_zoneinfo.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772
  1. import bisect
  2. import calendar
  3. import collections
  4. import functools
  5. import re
  6. import weakref
  7. from datetime import datetime, timedelta, tzinfo
  8. from . import _common, _tzpath
  9. EPOCH = datetime(1970, 1, 1)
  10. EPOCHORDINAL = datetime(1970, 1, 1).toordinal()
  11. # It is relatively expensive to construct new timedelta objects, and in most
  12. # cases we're looking at the same deltas, like integer numbers of hours, etc.
  13. # To improve speed and memory use, we'll keep a dictionary with references
  14. # to the ones we've already used so far.
  15. #
  16. # Loading every time zone in the 2020a version of the time zone database
  17. # requires 447 timedeltas, which requires approximately the amount of space
  18. # that ZoneInfo("America/New_York") with 236 transitions takes up, so we will
  19. # set the cache size to 512 so that in the common case we always get cache
  20. # hits, but specifically crafted ZoneInfo objects don't leak arbitrary amounts
  21. # of memory.
  22. @functools.lru_cache(maxsize=512)
  23. def _load_timedelta(seconds):
  24. return timedelta(seconds=seconds)
  25. class ZoneInfo(tzinfo):
  26. _strong_cache_size = 8
  27. _strong_cache = collections.OrderedDict()
  28. _weak_cache = weakref.WeakValueDictionary()
  29. __module__ = "zoneinfo"
  30. def __init_subclass__(cls):
  31. cls._strong_cache = collections.OrderedDict()
  32. cls._weak_cache = weakref.WeakValueDictionary()
  33. def __new__(cls, key):
  34. instance = cls._weak_cache.get(key, None)
  35. if instance is None:
  36. instance = cls._weak_cache.setdefault(key, cls._new_instance(key))
  37. instance._from_cache = True
  38. # Update the "strong" cache
  39. cls._strong_cache[key] = cls._strong_cache.pop(key, instance)
  40. if len(cls._strong_cache) > cls._strong_cache_size:
  41. cls._strong_cache.popitem(last=False)
  42. return instance
  43. @classmethod
  44. def no_cache(cls, key):
  45. obj = cls._new_instance(key)
  46. obj._from_cache = False
  47. return obj
  48. @classmethod
  49. def _new_instance(cls, key):
  50. obj = super().__new__(cls)
  51. obj._key = key
  52. obj._file_path = obj._find_tzfile(key)
  53. if obj._file_path is not None:
  54. file_obj = open(obj._file_path, "rb")
  55. else:
  56. file_obj = _common.load_tzdata(key)
  57. with file_obj as f:
  58. obj._load_file(f)
  59. return obj
  60. @classmethod
  61. def from_file(cls, fobj, /, key=None):
  62. obj = super().__new__(cls)
  63. obj._key = key
  64. obj._file_path = None
  65. obj._load_file(fobj)
  66. obj._file_repr = repr(fobj)
  67. # Disable pickling for objects created from files
  68. obj.__reduce__ = obj._file_reduce
  69. return obj
  70. @classmethod
  71. def clear_cache(cls, *, only_keys=None):
  72. if only_keys is not None:
  73. for key in only_keys:
  74. cls._weak_cache.pop(key, None)
  75. cls._strong_cache.pop(key, None)
  76. else:
  77. cls._weak_cache.clear()
  78. cls._strong_cache.clear()
  79. @property
  80. def key(self):
  81. return self._key
  82. def utcoffset(self, dt):
  83. return self._find_trans(dt).utcoff
  84. def dst(self, dt):
  85. return self._find_trans(dt).dstoff
  86. def tzname(self, dt):
  87. return self._find_trans(dt).tzname
  88. def fromutc(self, dt):
  89. """Convert from datetime in UTC to datetime in local time"""
  90. if not isinstance(dt, datetime):
  91. raise TypeError("fromutc() requires a datetime argument")
  92. if dt.tzinfo is not self:
  93. raise ValueError("dt.tzinfo is not self")
  94. timestamp = self._get_local_timestamp(dt)
  95. num_trans = len(self._trans_utc)
  96. if num_trans >= 1 and timestamp < self._trans_utc[0]:
  97. tti = self._tti_before
  98. fold = 0
  99. elif (
  100. num_trans == 0 or timestamp > self._trans_utc[-1]
  101. ) and not isinstance(self._tz_after, _ttinfo):
  102. tti, fold = self._tz_after.get_trans_info_fromutc(
  103. timestamp, dt.year
  104. )
  105. elif num_trans == 0:
  106. tti = self._tz_after
  107. fold = 0
  108. else:
  109. idx = bisect.bisect_right(self._trans_utc, timestamp)
  110. if num_trans > 1 and timestamp >= self._trans_utc[1]:
  111. tti_prev, tti = self._ttinfos[idx - 2 : idx]
  112. elif timestamp > self._trans_utc[-1]:
  113. tti_prev = self._ttinfos[-1]
  114. tti = self._tz_after
  115. else:
  116. tti_prev = self._tti_before
  117. tti = self._ttinfos[0]
  118. # Detect fold
  119. shift = tti_prev.utcoff - tti.utcoff
  120. fold = shift.total_seconds() > timestamp - self._trans_utc[idx - 1]
  121. dt += tti.utcoff
  122. if fold:
  123. return dt.replace(fold=1)
  124. else:
  125. return dt
  126. def _find_trans(self, dt):
  127. if dt is None:
  128. if self._fixed_offset:
  129. return self._tz_after
  130. else:
  131. return _NO_TTINFO
  132. ts = self._get_local_timestamp(dt)
  133. lt = self._trans_local[dt.fold]
  134. num_trans = len(lt)
  135. if num_trans and ts < lt[0]:
  136. return self._tti_before
  137. elif not num_trans or ts > lt[-1]:
  138. if isinstance(self._tz_after, _TZStr):
  139. return self._tz_after.get_trans_info(ts, dt.year, dt.fold)
  140. else:
  141. return self._tz_after
  142. else:
  143. # idx is the transition that occurs after this timestamp, so we
  144. # subtract off 1 to get the current ttinfo
  145. idx = bisect.bisect_right(lt, ts) - 1
  146. assert idx >= 0
  147. return self._ttinfos[idx]
  148. def _get_local_timestamp(self, dt):
  149. return (
  150. (dt.toordinal() - EPOCHORDINAL) * 86400
  151. + dt.hour * 3600
  152. + dt.minute * 60
  153. + dt.second
  154. )
  155. def __str__(self):
  156. if self._key is not None:
  157. return f"{self._key}"
  158. else:
  159. return repr(self)
  160. def __repr__(self):
  161. if self._key is not None:
  162. return f"{self.__class__.__name__}(key={self._key!r})"
  163. else:
  164. return f"{self.__class__.__name__}.from_file({self._file_repr})"
  165. def __reduce__(self):
  166. return (self.__class__._unpickle, (self._key, self._from_cache))
  167. def _file_reduce(self):
  168. import pickle
  169. raise pickle.PicklingError(
  170. "Cannot pickle a ZoneInfo file created from a file stream."
  171. )
  172. @classmethod
  173. def _unpickle(cls, key, from_cache, /):
  174. if from_cache:
  175. return cls(key)
  176. else:
  177. return cls.no_cache(key)
  178. def _find_tzfile(self, key):
  179. return _tzpath.find_tzfile(key)
  180. def _load_file(self, fobj):
  181. # Retrieve all the data as it exists in the zoneinfo file
  182. trans_idx, trans_utc, utcoff, isdst, abbr, tz_str = _common.load_data(
  183. fobj
  184. )
  185. # Infer the DST offsets (needed for .dst()) from the data
  186. dstoff = self._utcoff_to_dstoff(trans_idx, utcoff, isdst)
  187. # Convert all the transition times (UTC) into "seconds since 1970-01-01 local time"
  188. trans_local = self._ts_to_local(trans_idx, trans_utc, utcoff)
  189. # Construct `_ttinfo` objects for each transition in the file
  190. _ttinfo_list = [
  191. _ttinfo(
  192. _load_timedelta(utcoffset), _load_timedelta(dstoffset), tzname
  193. )
  194. for utcoffset, dstoffset, tzname in zip(utcoff, dstoff, abbr)
  195. ]
  196. self._trans_utc = trans_utc
  197. self._trans_local = trans_local
  198. self._ttinfos = [_ttinfo_list[idx] for idx in trans_idx]
  199. # Find the first non-DST transition
  200. for i in range(len(isdst)):
  201. if not isdst[i]:
  202. self._tti_before = _ttinfo_list[i]
  203. break
  204. else:
  205. if self._ttinfos:
  206. self._tti_before = self._ttinfos[0]
  207. else:
  208. self._tti_before = None
  209. # Set the "fallback" time zone
  210. if tz_str is not None and tz_str != b"":
  211. self._tz_after = _parse_tz_str(tz_str.decode())
  212. else:
  213. if not self._ttinfos and not _ttinfo_list:
  214. raise ValueError("No time zone information found.")
  215. if self._ttinfos:
  216. self._tz_after = self._ttinfos[-1]
  217. else:
  218. self._tz_after = _ttinfo_list[-1]
  219. # Determine if this is a "fixed offset" zone, meaning that the output
  220. # of the utcoffset, dst and tzname functions does not depend on the
  221. # specific datetime passed.
  222. #
  223. # We make three simplifying assumptions here:
  224. #
  225. # 1. If _tz_after is not a _ttinfo, it has transitions that might
  226. # actually occur (it is possible to construct TZ strings that
  227. # specify STD and DST but no transitions ever occur, such as
  228. # AAA0BBB,0/0,J365/25).
  229. # 2. If _ttinfo_list contains more than one _ttinfo object, the objects
  230. # represent different offsets.
  231. # 3. _ttinfo_list contains no unused _ttinfos (in which case an
  232. # otherwise fixed-offset zone with extra _ttinfos defined may
  233. # appear to *not* be a fixed offset zone).
  234. #
  235. # Violations to these assumptions would be fairly exotic, and exotic
  236. # zones should almost certainly not be used with datetime.time (the
  237. # only thing that would be affected by this).
  238. if len(_ttinfo_list) > 1 or not isinstance(self._tz_after, _ttinfo):
  239. self._fixed_offset = False
  240. elif not _ttinfo_list:
  241. self._fixed_offset = True
  242. else:
  243. self._fixed_offset = _ttinfo_list[0] == self._tz_after
  244. @staticmethod
  245. def _utcoff_to_dstoff(trans_idx, utcoffsets, isdsts):
  246. # Now we must transform our ttis and abbrs into `_ttinfo` objects,
  247. # but there is an issue: .dst() must return a timedelta with the
  248. # difference between utcoffset() and the "standard" offset, but
  249. # the "base offset" and "DST offset" are not encoded in the file;
  250. # we can infer what they are from the isdst flag, but it is not
  251. # sufficient to just look at the last standard offset, because
  252. # occasionally countries will shift both DST offset and base offset.
  253. typecnt = len(isdsts)
  254. dstoffs = [0] * typecnt # Provisionally assign all to 0.
  255. dst_cnt = sum(isdsts)
  256. dst_found = 0
  257. for i in range(1, len(trans_idx)):
  258. if dst_cnt == dst_found:
  259. break
  260. idx = trans_idx[i]
  261. dst = isdsts[idx]
  262. # We're only going to look at daylight saving time
  263. if not dst:
  264. continue
  265. # Skip any offsets that have already been assigned
  266. if dstoffs[idx] != 0:
  267. continue
  268. dstoff = 0
  269. utcoff = utcoffsets[idx]
  270. comp_idx = trans_idx[i - 1]
  271. if not isdsts[comp_idx]:
  272. dstoff = utcoff - utcoffsets[comp_idx]
  273. if not dstoff and idx < (typecnt - 1):
  274. comp_idx = trans_idx[i + 1]
  275. # If the following transition is also DST and we couldn't
  276. # find the DST offset by this point, we're going to have to
  277. # skip it and hope this transition gets assigned later
  278. if isdsts[comp_idx]:
  279. continue
  280. dstoff = utcoff - utcoffsets[comp_idx]
  281. if dstoff:
  282. dst_found += 1
  283. dstoffs[idx] = dstoff
  284. else:
  285. # If we didn't find a valid value for a given index, we'll end up
  286. # with dstoff = 0 for something where `isdst=1`. This is obviously
  287. # wrong - one hour will be a much better guess than 0
  288. for idx in range(typecnt):
  289. if not dstoffs[idx] and isdsts[idx]:
  290. dstoffs[idx] = 3600
  291. return dstoffs
  292. @staticmethod
  293. def _ts_to_local(trans_idx, trans_list_utc, utcoffsets):
  294. """Generate number of seconds since 1970 *in the local time*.
  295. This is necessary to easily find the transition times in local time"""
  296. if not trans_list_utc:
  297. return [[], []]
  298. # Start with the timestamps and modify in-place
  299. trans_list_wall = [list(trans_list_utc), list(trans_list_utc)]
  300. if len(utcoffsets) > 1:
  301. offset_0 = utcoffsets[0]
  302. offset_1 = utcoffsets[trans_idx[0]]
  303. if offset_1 > offset_0:
  304. offset_1, offset_0 = offset_0, offset_1
  305. else:
  306. offset_0 = offset_1 = utcoffsets[0]
  307. trans_list_wall[0][0] += offset_0
  308. trans_list_wall[1][0] += offset_1
  309. for i in range(1, len(trans_idx)):
  310. offset_0 = utcoffsets[trans_idx[i - 1]]
  311. offset_1 = utcoffsets[trans_idx[i]]
  312. if offset_1 > offset_0:
  313. offset_1, offset_0 = offset_0, offset_1
  314. trans_list_wall[0][i] += offset_0
  315. trans_list_wall[1][i] += offset_1
  316. return trans_list_wall
  317. class _ttinfo:
  318. __slots__ = ["utcoff", "dstoff", "tzname"]
  319. def __init__(self, utcoff, dstoff, tzname):
  320. self.utcoff = utcoff
  321. self.dstoff = dstoff
  322. self.tzname = tzname
  323. def __eq__(self, other):
  324. return (
  325. self.utcoff == other.utcoff
  326. and self.dstoff == other.dstoff
  327. and self.tzname == other.tzname
  328. )
  329. def __repr__(self): # pragma: nocover
  330. return (
  331. f"{self.__class__.__name__}"
  332. + f"({self.utcoff}, {self.dstoff}, {self.tzname})"
  333. )
  334. _NO_TTINFO = _ttinfo(None, None, None)
  335. class _TZStr:
  336. __slots__ = (
  337. "std",
  338. "dst",
  339. "start",
  340. "end",
  341. "get_trans_info",
  342. "get_trans_info_fromutc",
  343. "dst_diff",
  344. )
  345. def __init__(
  346. self, std_abbr, std_offset, dst_abbr, dst_offset, start=None, end=None
  347. ):
  348. self.dst_diff = dst_offset - std_offset
  349. std_offset = _load_timedelta(std_offset)
  350. self.std = _ttinfo(
  351. utcoff=std_offset, dstoff=_load_timedelta(0), tzname=std_abbr
  352. )
  353. self.start = start
  354. self.end = end
  355. dst_offset = _load_timedelta(dst_offset)
  356. delta = _load_timedelta(self.dst_diff)
  357. self.dst = _ttinfo(utcoff=dst_offset, dstoff=delta, tzname=dst_abbr)
  358. # These are assertions because the constructor should only be called
  359. # by functions that would fail before passing start or end
  360. assert start is not None, "No transition start specified"
  361. assert end is not None, "No transition end specified"
  362. self.get_trans_info = self._get_trans_info
  363. self.get_trans_info_fromutc = self._get_trans_info_fromutc
  364. def transitions(self, year):
  365. start = self.start.year_to_epoch(year)
  366. end = self.end.year_to_epoch(year)
  367. return start, end
  368. def _get_trans_info(self, ts, year, fold):
  369. """Get the information about the current transition - tti"""
  370. start, end = self.transitions(year)
  371. # With fold = 0, the period (denominated in local time) with the
  372. # smaller offset starts at the end of the gap and ends at the end of
  373. # the fold; with fold = 1, it runs from the start of the gap to the
  374. # beginning of the fold.
  375. #
  376. # So in order to determine the DST boundaries we need to know both
  377. # the fold and whether DST is positive or negative (rare), and it
  378. # turns out that this boils down to fold XOR is_positive.
  379. if fold == (self.dst_diff >= 0):
  380. end -= self.dst_diff
  381. else:
  382. start += self.dst_diff
  383. if start < end:
  384. isdst = start <= ts < end
  385. else:
  386. isdst = not (end <= ts < start)
  387. return self.dst if isdst else self.std
  388. def _get_trans_info_fromutc(self, ts, year):
  389. start, end = self.transitions(year)
  390. start -= self.std.utcoff.total_seconds()
  391. end -= self.dst.utcoff.total_seconds()
  392. if start < end:
  393. isdst = start <= ts < end
  394. else:
  395. isdst = not (end <= ts < start)
  396. # For positive DST, the ambiguous period is one dst_diff after the end
  397. # of DST; for negative DST, the ambiguous period is one dst_diff before
  398. # the start of DST.
  399. if self.dst_diff > 0:
  400. ambig_start = end
  401. ambig_end = end + self.dst_diff
  402. else:
  403. ambig_start = start
  404. ambig_end = start - self.dst_diff
  405. fold = ambig_start <= ts < ambig_end
  406. return (self.dst if isdst else self.std, fold)
  407. def _post_epoch_days_before_year(year):
  408. """Get the number of days between 1970-01-01 and YEAR-01-01"""
  409. y = year - 1
  410. return y * 365 + y // 4 - y // 100 + y // 400 - EPOCHORDINAL
  411. class _DayOffset:
  412. __slots__ = ["d", "julian", "hour", "minute", "second"]
  413. def __init__(self, d, julian, hour=2, minute=0, second=0):
  414. min_day = 0 + julian # convert bool to int
  415. if not min_day <= d <= 365:
  416. raise ValueError(f"d must be in [{min_day}, 365], not: {d}")
  417. self.d = d
  418. self.julian = julian
  419. self.hour = hour
  420. self.minute = minute
  421. self.second = second
  422. def year_to_epoch(self, year):
  423. days_before_year = _post_epoch_days_before_year(year)
  424. d = self.d
  425. if self.julian and d >= 59 and calendar.isleap(year):
  426. d += 1
  427. epoch = (days_before_year + d) * 86400
  428. epoch += self.hour * 3600 + self.minute * 60 + self.second
  429. return epoch
  430. class _CalendarOffset:
  431. __slots__ = ["m", "w", "d", "hour", "minute", "second"]
  432. _DAYS_BEFORE_MONTH = (
  433. -1,
  434. 0,
  435. 31,
  436. 59,
  437. 90,
  438. 120,
  439. 151,
  440. 181,
  441. 212,
  442. 243,
  443. 273,
  444. 304,
  445. 334,
  446. )
  447. def __init__(self, m, w, d, hour=2, minute=0, second=0):
  448. if not 1 <= m <= 12:
  449. raise ValueError("m must be in [1, 12]")
  450. if not 1 <= w <= 5:
  451. raise ValueError("w must be in [1, 5]")
  452. if not 0 <= d <= 6:
  453. raise ValueError("d must be in [0, 6]")
  454. self.m = m
  455. self.w = w
  456. self.d = d
  457. self.hour = hour
  458. self.minute = minute
  459. self.second = second
  460. @classmethod
  461. def _ymd2ord(cls, year, month, day):
  462. return (
  463. _post_epoch_days_before_year(year)
  464. + cls._DAYS_BEFORE_MONTH[month]
  465. + (month > 2 and calendar.isleap(year))
  466. + day
  467. )
  468. # TODO: These are not actually epoch dates as they are expressed in local time
  469. def year_to_epoch(self, year):
  470. """Calculates the datetime of the occurrence from the year"""
  471. # We know year and month, we need to convert w, d into day of month
  472. #
  473. # Week 1 is the first week in which day `d` (where 0 = Sunday) appears.
  474. # Week 5 represents the last occurrence of day `d`, so we need to know
  475. # the range of the month.
  476. first_day, days_in_month = calendar.monthrange(year, self.m)
  477. # This equation seems magical, so I'll break it down:
  478. # 1. calendar says 0 = Monday, POSIX says 0 = Sunday
  479. # so we need first_day + 1 to get 1 = Monday -> 7 = Sunday,
  480. # which is still equivalent because this math is mod 7
  481. # 2. Get first day - desired day mod 7: -1 % 7 = 6, so we don't need
  482. # to do anything to adjust negative numbers.
  483. # 3. Add 1 because month days are a 1-based index.
  484. month_day = (self.d - (first_day + 1)) % 7 + 1
  485. # Now use a 0-based index version of `w` to calculate the w-th
  486. # occurrence of `d`
  487. month_day += (self.w - 1) * 7
  488. # month_day will only be > days_in_month if w was 5, and `w` means
  489. # "last occurrence of `d`", so now we just check if we over-shot the
  490. # end of the month and if so knock off 1 week.
  491. if month_day > days_in_month:
  492. month_day -= 7
  493. ordinal = self._ymd2ord(year, self.m, month_day)
  494. epoch = ordinal * 86400
  495. epoch += self.hour * 3600 + self.minute * 60 + self.second
  496. return epoch
  497. def _parse_tz_str(tz_str):
  498. # The tz string has the format:
  499. #
  500. # std[offset[dst[offset],start[/time],end[/time]]]
  501. #
  502. # std and dst must be 3 or more characters long and must not contain
  503. # a leading colon, embedded digits, commas, nor a plus or minus signs;
  504. # The spaces between "std" and "offset" are only for display and are
  505. # not actually present in the string.
  506. #
  507. # The format of the offset is ``[+|-]hh[:mm[:ss]]``
  508. offset_str, *start_end_str = tz_str.split(",", 1)
  509. parser_re = re.compile(
  510. r"""
  511. (?P<std>[^<0-9:.+-]+|<[a-zA-Z0-9+-]+>)
  512. (?:
  513. (?P<stdoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?)
  514. (?:
  515. (?P<dst>[^0-9:.+-]+|<[a-zA-Z0-9+-]+>)
  516. (?P<dstoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?)?
  517. )? # dst
  518. )? # stdoff
  519. """,
  520. re.ASCII|re.VERBOSE
  521. )
  522. m = parser_re.fullmatch(offset_str)
  523. if m is None:
  524. raise ValueError(f"{tz_str} is not a valid TZ string")
  525. std_abbr = m.group("std")
  526. dst_abbr = m.group("dst")
  527. dst_offset = None
  528. std_abbr = std_abbr.strip("<>")
  529. if dst_abbr:
  530. dst_abbr = dst_abbr.strip("<>")
  531. if std_offset := m.group("stdoff"):
  532. try:
  533. std_offset = _parse_tz_delta(std_offset)
  534. except ValueError as e:
  535. raise ValueError(f"Invalid STD offset in {tz_str}") from e
  536. else:
  537. std_offset = 0
  538. if dst_abbr is not None:
  539. if dst_offset := m.group("dstoff"):
  540. try:
  541. dst_offset = _parse_tz_delta(dst_offset)
  542. except ValueError as e:
  543. raise ValueError(f"Invalid DST offset in {tz_str}") from e
  544. else:
  545. dst_offset = std_offset + 3600
  546. if not start_end_str:
  547. raise ValueError(f"Missing transition rules: {tz_str}")
  548. start_end_strs = start_end_str[0].split(",", 1)
  549. try:
  550. start, end = (_parse_dst_start_end(x) for x in start_end_strs)
  551. except ValueError as e:
  552. raise ValueError(f"Invalid TZ string: {tz_str}") from e
  553. return _TZStr(std_abbr, std_offset, dst_abbr, dst_offset, start, end)
  554. elif start_end_str:
  555. raise ValueError(f"Transition rule present without DST: {tz_str}")
  556. else:
  557. # This is a static ttinfo, don't return _TZStr
  558. return _ttinfo(
  559. _load_timedelta(std_offset), _load_timedelta(0), std_abbr
  560. )
  561. def _parse_dst_start_end(dststr):
  562. date, *time = dststr.split("/", 1)
  563. type = date[:1]
  564. if type == "M":
  565. n_is_julian = False
  566. m = re.fullmatch(r"M(\d{1,2})\.(\d).(\d)", date, re.ASCII)
  567. if m is None:
  568. raise ValueError(f"Invalid dst start/end date: {dststr}")
  569. date_offset = tuple(map(int, m.groups()))
  570. offset = _CalendarOffset(*date_offset)
  571. else:
  572. if type == "J":
  573. n_is_julian = True
  574. date = date[1:]
  575. else:
  576. n_is_julian = False
  577. doy = int(date)
  578. offset = _DayOffset(doy, n_is_julian)
  579. if time:
  580. offset.hour, offset.minute, offset.second = _parse_transition_time(time[0])
  581. return offset
  582. def _parse_transition_time(time_str):
  583. match = re.fullmatch(
  584. r"(?P<sign>[+-])?(?P<h>\d{1,3})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?",
  585. time_str,
  586. re.ASCII
  587. )
  588. if match is None:
  589. raise ValueError(f"Invalid time: {time_str}")
  590. h, m, s = (int(v or 0) for v in match.group("h", "m", "s"))
  591. if h > 167:
  592. raise ValueError(
  593. f"Hour must be in [0, 167]: {time_str}"
  594. )
  595. if match.group("sign") == "-":
  596. h, m, s = -h, -m, -s
  597. return h, m, s
  598. def _parse_tz_delta(tz_delta):
  599. match = re.fullmatch(
  600. r"(?P<sign>[+-])?(?P<h>\d{1,3})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?",
  601. tz_delta,
  602. re.ASCII
  603. )
  604. # Anything passed to this function should already have hit an equivalent
  605. # regular expression to find the section to parse.
  606. assert match is not None, tz_delta
  607. h, m, s = (int(v or 0) for v in match.group("h", "m", "s"))
  608. total = h * 3600 + m * 60 + s
  609. if h > 24:
  610. raise ValueError(
  611. f"Offset hours must be in [0, 24]: {tz_delta}"
  612. )
  613. # Yes, +5 maps to an offset of -5h
  614. if match.group("sign") != "-":
  615. total = -total
  616. return total