test_parser.py 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964
  1. # -*- coding: utf-8 -*-
  2. from __future__ import unicode_literals
  3. import itertools
  4. from datetime import datetime, timedelta
  5. import unittest
  6. import sys
  7. from dateutil import tz
  8. from dateutil.tz import tzoffset
  9. from dateutil.parser import parse, parserinfo
  10. from dateutil.parser import ParserError
  11. from dateutil.parser import UnknownTimezoneWarning
  12. from ._common import TZEnvContext
  13. from six import assertRaisesRegex, PY2
  14. from io import StringIO
  15. import pytest
  16. # Platform info
  17. IS_WIN = sys.platform.startswith('win')
  18. PLATFORM_HAS_DASH_D = False
  19. try:
  20. if datetime.now().strftime('%-d'):
  21. PLATFORM_HAS_DASH_D = True
  22. except ValueError:
  23. pass
  24. @pytest.fixture(params=[True, False])
  25. def fuzzy(request):
  26. """Fixture to pass fuzzy=True or fuzzy=False to parse"""
  27. return request.param
  28. # Parser test cases using no keyword arguments. Format: (parsable_text, expected_datetime, assertion_message)
  29. PARSER_TEST_CASES = [
  30. ("Thu Sep 25 10:36:28 2003", datetime(2003, 9, 25, 10, 36, 28), "date command format strip"),
  31. ("Thu Sep 25 2003", datetime(2003, 9, 25), "date command format strip"),
  32. ("2003-09-25T10:49:41", datetime(2003, 9, 25, 10, 49, 41), "iso format strip"),
  33. ("2003-09-25T10:49", datetime(2003, 9, 25, 10, 49), "iso format strip"),
  34. ("2003-09-25T10", datetime(2003, 9, 25, 10), "iso format strip"),
  35. ("2003-09-25", datetime(2003, 9, 25), "iso format strip"),
  36. ("20030925T104941", datetime(2003, 9, 25, 10, 49, 41), "iso stripped format strip"),
  37. ("20030925T1049", datetime(2003, 9, 25, 10, 49, 0), "iso stripped format strip"),
  38. ("20030925T10", datetime(2003, 9, 25, 10), "iso stripped format strip"),
  39. ("20030925", datetime(2003, 9, 25), "iso stripped format strip"),
  40. ("2003-09-25 10:49:41,502", datetime(2003, 9, 25, 10, 49, 41, 502000), "python logger format"),
  41. ("199709020908", datetime(1997, 9, 2, 9, 8), "no separator"),
  42. ("19970902090807", datetime(1997, 9, 2, 9, 8, 7), "no separator"),
  43. ("09-25-2003", datetime(2003, 9, 25), "date with dash"),
  44. ("25-09-2003", datetime(2003, 9, 25), "date with dash"),
  45. ("10-09-2003", datetime(2003, 10, 9), "date with dash"),
  46. ("10-09-03", datetime(2003, 10, 9), "date with dash"),
  47. ("2003.09.25", datetime(2003, 9, 25), "date with dot"),
  48. ("09.25.2003", datetime(2003, 9, 25), "date with dot"),
  49. ("25.09.2003", datetime(2003, 9, 25), "date with dot"),
  50. ("10.09.2003", datetime(2003, 10, 9), "date with dot"),
  51. ("10.09.03", datetime(2003, 10, 9), "date with dot"),
  52. ("2003/09/25", datetime(2003, 9, 25), "date with slash"),
  53. ("09/25/2003", datetime(2003, 9, 25), "date with slash"),
  54. ("25/09/2003", datetime(2003, 9, 25), "date with slash"),
  55. ("10/09/2003", datetime(2003, 10, 9), "date with slash"),
  56. ("10/09/03", datetime(2003, 10, 9), "date with slash"),
  57. ("2003 09 25", datetime(2003, 9, 25), "date with space"),
  58. ("09 25 2003", datetime(2003, 9, 25), "date with space"),
  59. ("25 09 2003", datetime(2003, 9, 25), "date with space"),
  60. ("10 09 2003", datetime(2003, 10, 9), "date with space"),
  61. ("10 09 03", datetime(2003, 10, 9), "date with space"),
  62. ("25 09 03", datetime(2003, 9, 25), "date with space"),
  63. ("03 25 Sep", datetime(2003, 9, 25), "strangely ordered date"),
  64. ("25 03 Sep", datetime(2025, 9, 3), "strangely ordered date"),
  65. (" July 4 , 1976 12:01:02 am ", datetime(1976, 7, 4, 0, 1, 2), "extra space"),
  66. ("Wed, July 10, '96", datetime(1996, 7, 10, 0, 0), "random format"),
  67. ("1996.July.10 AD 12:08 PM", datetime(1996, 7, 10, 12, 8), "random format"),
  68. ("July 4, 1976", datetime(1976, 7, 4), "random format"),
  69. ("7 4 1976", datetime(1976, 7, 4), "random format"),
  70. ("4 jul 1976", datetime(1976, 7, 4), "random format"),
  71. ("4 Jul 1976", datetime(1976, 7, 4), "'%-d %b %Y' format"),
  72. ("7-4-76", datetime(1976, 7, 4), "random format"),
  73. ("19760704", datetime(1976, 7, 4), "random format"),
  74. ("0:01:02 on July 4, 1976", datetime(1976, 7, 4, 0, 1, 2), "random format"),
  75. ("July 4, 1976 12:01:02 am", datetime(1976, 7, 4, 0, 1, 2), "random format"),
  76. ("Mon Jan 2 04:24:27 1995", datetime(1995, 1, 2, 4, 24, 27), "random format"),
  77. ("04.04.95 00:22", datetime(1995, 4, 4, 0, 22), "random format"),
  78. ("Jan 1 1999 11:23:34.578", datetime(1999, 1, 1, 11, 23, 34, 578000), "random format"),
  79. ("950404 122212", datetime(1995, 4, 4, 12, 22, 12), "random format"),
  80. ("3rd of May 2001", datetime(2001, 5, 3), "random format"),
  81. ("5th of March 2001", datetime(2001, 3, 5), "random format"),
  82. ("1st of May 2003", datetime(2003, 5, 1), "random format"),
  83. ('0099-01-01T00:00:00', datetime(99, 1, 1, 0, 0), "99 ad"),
  84. ('0031-01-01T00:00:00', datetime(31, 1, 1, 0, 0), "31 ad"),
  85. ("20080227T21:26:01.123456789", datetime(2008, 2, 27, 21, 26, 1, 123456), "high precision seconds"),
  86. ('13NOV2017', datetime(2017, 11, 13), "dBY (See GH360)"),
  87. ('0003-03-04', datetime(3, 3, 4), "pre 12 year same month (See GH PR #293)"),
  88. ('December.0031.30', datetime(31, 12, 30), "BYd corner case (GH#687)"),
  89. # Cases with legacy h/m/s format, candidates for deprecation (GH#886)
  90. ("2016-12-21 04.2h", datetime(2016, 12, 21, 4, 12), "Fractional Hours"),
  91. ]
  92. # Check that we don't have any duplicates
  93. assert len(set([x[0] for x in PARSER_TEST_CASES])) == len(PARSER_TEST_CASES)
  94. @pytest.mark.parametrize("parsable_text,expected_datetime,assertion_message", PARSER_TEST_CASES)
  95. def test_parser(parsable_text, expected_datetime, assertion_message):
  96. assert parse(parsable_text) == expected_datetime, assertion_message
  97. # Parser test cases using datetime(2003, 9, 25) as a default.
  98. # Format: (parsable_text, expected_datetime, assertion_message)
  99. PARSER_DEFAULT_TEST_CASES = [
  100. ("Thu Sep 25 10:36:28", datetime(2003, 9, 25, 10, 36, 28), "date command format strip"),
  101. ("Thu Sep 10:36:28", datetime(2003, 9, 25, 10, 36, 28), "date command format strip"),
  102. ("Thu 10:36:28", datetime(2003, 9, 25, 10, 36, 28), "date command format strip"),
  103. ("Sep 10:36:28", datetime(2003, 9, 25, 10, 36, 28), "date command format strip"),
  104. ("10:36:28", datetime(2003, 9, 25, 10, 36, 28), "date command format strip"),
  105. ("10:36", datetime(2003, 9, 25, 10, 36), "date command format strip"),
  106. ("Sep 2003", datetime(2003, 9, 25), "date command format strip"),
  107. ("Sep", datetime(2003, 9, 25), "date command format strip"),
  108. ("2003", datetime(2003, 9, 25), "date command format strip"),
  109. ("10h36m28.5s", datetime(2003, 9, 25, 10, 36, 28, 500000), "hour with letters"),
  110. ("10h36m28s", datetime(2003, 9, 25, 10, 36, 28), "hour with letters strip"),
  111. ("10h36m", datetime(2003, 9, 25, 10, 36), "hour with letters strip"),
  112. ("10h", datetime(2003, 9, 25, 10), "hour with letters strip"),
  113. ("10 h 36", datetime(2003, 9, 25, 10, 36), "hour with letters strip"),
  114. ("10 h 36.5", datetime(2003, 9, 25, 10, 36, 30), "hour with letter strip"),
  115. ("36 m 5", datetime(2003, 9, 25, 0, 36, 5), "hour with letters spaces"),
  116. ("36 m 5 s", datetime(2003, 9, 25, 0, 36, 5), "minute with letters spaces"),
  117. ("36 m 05", datetime(2003, 9, 25, 0, 36, 5), "minute with letters spaces"),
  118. ("36 m 05 s", datetime(2003, 9, 25, 0, 36, 5), "minutes with letters spaces"),
  119. ("10h am", datetime(2003, 9, 25, 10), "hour am pm"),
  120. ("10h pm", datetime(2003, 9, 25, 22), "hour am pm"),
  121. ("10am", datetime(2003, 9, 25, 10), "hour am pm"),
  122. ("10pm", datetime(2003, 9, 25, 22), "hour am pm"),
  123. ("10:00 am", datetime(2003, 9, 25, 10), "hour am pm"),
  124. ("10:00 pm", datetime(2003, 9, 25, 22), "hour am pm"),
  125. ("10:00am", datetime(2003, 9, 25, 10), "hour am pm"),
  126. ("10:00pm", datetime(2003, 9, 25, 22), "hour am pm"),
  127. ("10:00a.m", datetime(2003, 9, 25, 10), "hour am pm"),
  128. ("10:00p.m", datetime(2003, 9, 25, 22), "hour am pm"),
  129. ("10:00a.m.", datetime(2003, 9, 25, 10), "hour am pm"),
  130. ("10:00p.m.", datetime(2003, 9, 25, 22), "hour am pm"),
  131. ("Wed", datetime(2003, 10, 1), "weekday alone"),
  132. ("Wednesday", datetime(2003, 10, 1), "long weekday"),
  133. ("October", datetime(2003, 10, 25), "long month"),
  134. ("31-Dec-00", datetime(2000, 12, 31), "zero year"),
  135. ("0:01:02", datetime(2003, 9, 25, 0, 1, 2), "random format"),
  136. ("12h 01m02s am", datetime(2003, 9, 25, 0, 1, 2), "random format"),
  137. ("12:08 PM", datetime(2003, 9, 25, 12, 8), "random format"),
  138. ("01h02m03", datetime(2003, 9, 25, 1, 2, 3), "random format"),
  139. ("01h02", datetime(2003, 9, 25, 1, 2), "random format"),
  140. ("01h02s", datetime(2003, 9, 25, 1, 0, 2), "random format"),
  141. ("01m02", datetime(2003, 9, 25, 0, 1, 2), "random format"),
  142. ("01m02h", datetime(2003, 9, 25, 2, 1), "random format"),
  143. ("2004 10 Apr 11h30m", datetime(2004, 4, 10, 11, 30), "random format")
  144. ]
  145. # Check that we don't have any duplicates
  146. assert len(set([x[0] for x in PARSER_DEFAULT_TEST_CASES])) == len(PARSER_DEFAULT_TEST_CASES)
  147. @pytest.mark.parametrize("parsable_text,expected_datetime,assertion_message", PARSER_DEFAULT_TEST_CASES)
  148. def test_parser_default(parsable_text, expected_datetime, assertion_message):
  149. assert parse(parsable_text, default=datetime(2003, 9, 25)) == expected_datetime, assertion_message
  150. @pytest.mark.parametrize('sep', ['-', '.', '/', ' '])
  151. def test_parse_dayfirst(sep):
  152. expected = datetime(2003, 9, 10)
  153. fmt = sep.join(['%d', '%m', '%Y'])
  154. dstr = expected.strftime(fmt)
  155. result = parse(dstr, dayfirst=True)
  156. assert result == expected
  157. @pytest.mark.parametrize('sep', ['-', '.', '/', ' '])
  158. def test_parse_yearfirst(sep):
  159. expected = datetime(2010, 9, 3)
  160. fmt = sep.join(['%Y', '%m', '%d'])
  161. dstr = expected.strftime(fmt)
  162. result = parse(dstr, yearfirst=True)
  163. assert result == expected
  164. @pytest.mark.parametrize('dstr,expected', [
  165. ("Thu Sep 25 10:36:28 BRST 2003", datetime(2003, 9, 25, 10, 36, 28)),
  166. ("1996.07.10 AD at 15:08:56 PDT", datetime(1996, 7, 10, 15, 8, 56)),
  167. ("Tuesday, April 12, 1952 AD 3:30:42pm PST",
  168. datetime(1952, 4, 12, 15, 30, 42)),
  169. ("November 5, 1994, 8:15:30 am EST", datetime(1994, 11, 5, 8, 15, 30)),
  170. ("1994-11-05T08:15:30-05:00", datetime(1994, 11, 5, 8, 15, 30)),
  171. ("1994-11-05T08:15:30Z", datetime(1994, 11, 5, 8, 15, 30)),
  172. ("1976-07-04T00:01:02Z", datetime(1976, 7, 4, 0, 1, 2)),
  173. ("1986-07-05T08:15:30z", datetime(1986, 7, 5, 8, 15, 30)),
  174. ("Tue Apr 4 00:22:12 PDT 1995", datetime(1995, 4, 4, 0, 22, 12)),
  175. ])
  176. def test_parse_ignoretz(dstr, expected):
  177. result = parse(dstr, ignoretz=True)
  178. assert result == expected
  179. _brsttz = tzoffset("BRST", -10800)
  180. @pytest.mark.parametrize('dstr,expected', [
  181. ("20030925T104941-0300",
  182. datetime(2003, 9, 25, 10, 49, 41, tzinfo=_brsttz)),
  183. ("Thu, 25 Sep 2003 10:49:41 -0300",
  184. datetime(2003, 9, 25, 10, 49, 41, tzinfo=_brsttz)),
  185. ("2003-09-25T10:49:41.5-03:00",
  186. datetime(2003, 9, 25, 10, 49, 41, 500000, tzinfo=_brsttz)),
  187. ("2003-09-25T10:49:41-03:00",
  188. datetime(2003, 9, 25, 10, 49, 41, tzinfo=_brsttz)),
  189. ("20030925T104941.5-0300",
  190. datetime(2003, 9, 25, 10, 49, 41, 500000, tzinfo=_brsttz)),
  191. ])
  192. def test_parse_with_tzoffset(dstr, expected):
  193. # In these cases, we are _not_ passing a tzinfos arg
  194. result = parse(dstr)
  195. assert result == expected
  196. class TestFormat(object):
  197. def test_ybd(self):
  198. # If we have a 4-digit year, a non-numeric month (abbreviated or not),
  199. # and a day (1 or 2 digits), then there is no ambiguity as to which
  200. # token is a year/month/day. This holds regardless of what order the
  201. # terms are in and for each of the separators below.
  202. seps = ['-', ' ', '/', '.']
  203. year_tokens = ['%Y']
  204. month_tokens = ['%b', '%B']
  205. day_tokens = ['%d']
  206. if PLATFORM_HAS_DASH_D:
  207. day_tokens.append('%-d')
  208. prods = itertools.product(year_tokens, month_tokens, day_tokens)
  209. perms = [y for x in prods for y in itertools.permutations(x)]
  210. unambig_fmts = [sep.join(perm) for sep in seps for perm in perms]
  211. actual = datetime(2003, 9, 25)
  212. for fmt in unambig_fmts:
  213. dstr = actual.strftime(fmt)
  214. res = parse(dstr)
  215. assert res == actual
  216. # TODO: some redundancy with PARSER_TEST_CASES cases
  217. @pytest.mark.parametrize("fmt,dstr", [
  218. ("%a %b %d %Y", "Thu Sep 25 2003"),
  219. ("%b %d %Y", "Sep 25 2003"),
  220. ("%Y-%m-%d", "2003-09-25"),
  221. ("%Y%m%d", "20030925"),
  222. ("%Y-%b-%d", "2003-Sep-25"),
  223. ("%d-%b-%Y", "25-Sep-2003"),
  224. ("%b-%d-%Y", "Sep-25-2003"),
  225. ("%m-%d-%Y", "09-25-2003"),
  226. ("%d-%m-%Y", "25-09-2003"),
  227. ("%Y.%m.%d", "2003.09.25"),
  228. ("%Y.%b.%d", "2003.Sep.25"),
  229. ("%d.%b.%Y", "25.Sep.2003"),
  230. ("%b.%d.%Y", "Sep.25.2003"),
  231. ("%m.%d.%Y", "09.25.2003"),
  232. ("%d.%m.%Y", "25.09.2003"),
  233. ("%Y/%m/%d", "2003/09/25"),
  234. ("%Y/%b/%d", "2003/Sep/25"),
  235. ("%d/%b/%Y", "25/Sep/2003"),
  236. ("%b/%d/%Y", "Sep/25/2003"),
  237. ("%m/%d/%Y", "09/25/2003"),
  238. ("%d/%m/%Y", "25/09/2003"),
  239. ("%Y %m %d", "2003 09 25"),
  240. ("%Y %b %d", "2003 Sep 25"),
  241. ("%d %b %Y", "25 Sep 2003"),
  242. ("%m %d %Y", "09 25 2003"),
  243. ("%d %m %Y", "25 09 2003"),
  244. ("%y %d %b", "03 25 Sep",),
  245. ])
  246. def test_strftime_formats_2003Sep25(self, fmt, dstr):
  247. expected = datetime(2003, 9, 25)
  248. # First check that the format strings behave as expected
  249. # (not strictly necessary, but nice to have)
  250. assert expected.strftime(fmt) == dstr
  251. res = parse(dstr)
  252. assert res == expected
  253. class TestInputTypes(object):
  254. def test_empty_string_invalid(self):
  255. with pytest.raises(ParserError):
  256. parse('')
  257. def test_none_invalid(self):
  258. with pytest.raises(TypeError):
  259. parse(None)
  260. def test_int_invalid(self):
  261. with pytest.raises(TypeError):
  262. parse(13)
  263. def test_duck_typing(self):
  264. # We want to support arbitrary classes that implement the stream
  265. # interface.
  266. class StringPassThrough(object):
  267. def __init__(self, stream):
  268. self.stream = stream
  269. def read(self, *args, **kwargs):
  270. return self.stream.read(*args, **kwargs)
  271. dstr = StringPassThrough(StringIO('2014 January 19'))
  272. res = parse(dstr)
  273. expected = datetime(2014, 1, 19)
  274. assert res == expected
  275. def test_parse_stream(self):
  276. dstr = StringIO('2014 January 19')
  277. res = parse(dstr)
  278. expected = datetime(2014, 1, 19)
  279. assert res == expected
  280. def test_parse_str(self):
  281. # Parser should be able to handle bytestring and unicode
  282. uni_str = '2014-05-01 08:00:00'
  283. bytes_str = uni_str.encode()
  284. res = parse(bytes_str)
  285. expected = parse(uni_str)
  286. assert res == expected
  287. def test_parse_bytes(self):
  288. res = parse(b'2014 January 19')
  289. expected = datetime(2014, 1, 19)
  290. assert res == expected
  291. def test_parse_bytearray(self):
  292. # GH#417
  293. res = parse(bytearray(b'2014 January 19'))
  294. expected = datetime(2014, 1, 19)
  295. assert res == expected
  296. class TestTzinfoInputTypes(object):
  297. def assert_equal_same_tz(self, dt1, dt2):
  298. assert dt1 == dt2
  299. assert dt1.tzinfo is dt2.tzinfo
  300. def test_tzinfo_dict_could_return_none(self):
  301. dstr = "2017-02-03 12:40 BRST"
  302. result = parse(dstr, tzinfos={"BRST": None})
  303. expected = datetime(2017, 2, 3, 12, 40)
  304. self.assert_equal_same_tz(result, expected)
  305. def test_tzinfos_callable_could_return_none(self):
  306. dstr = "2017-02-03 12:40 BRST"
  307. result = parse(dstr, tzinfos=lambda *args: None)
  308. expected = datetime(2017, 2, 3, 12, 40)
  309. self.assert_equal_same_tz(result, expected)
  310. def test_invalid_tzinfo_input(self):
  311. dstr = "2014 January 19 09:00 UTC"
  312. # Pass an absurd tzinfos object
  313. tzinfos = {"UTC": ValueError}
  314. with pytest.raises(TypeError):
  315. parse(dstr, tzinfos=tzinfos)
  316. def test_valid_tzinfo_tzinfo_input(self):
  317. dstr = "2014 January 19 09:00 UTC"
  318. tzinfos = {"UTC": tz.UTC}
  319. expected = datetime(2014, 1, 19, 9, tzinfo=tz.UTC)
  320. res = parse(dstr, tzinfos=tzinfos)
  321. self.assert_equal_same_tz(res, expected)
  322. def test_valid_tzinfo_unicode_input(self):
  323. dstr = "2014 January 19 09:00 UTC"
  324. tzinfos = {u"UTC": u"UTC+0"}
  325. expected = datetime(2014, 1, 19, 9, tzinfo=tz.tzstr("UTC+0"))
  326. res = parse(dstr, tzinfos=tzinfos)
  327. self.assert_equal_same_tz(res, expected)
  328. def test_valid_tzinfo_callable_input(self):
  329. dstr = "2014 January 19 09:00 UTC"
  330. def tzinfos(*args, **kwargs):
  331. return u"UTC+0"
  332. expected = datetime(2014, 1, 19, 9, tzinfo=tz.tzstr("UTC+0"))
  333. res = parse(dstr, tzinfos=tzinfos)
  334. self.assert_equal_same_tz(res, expected)
  335. def test_valid_tzinfo_int_input(self):
  336. dstr = "2014 January 19 09:00 UTC"
  337. tzinfos = {u"UTC": -28800}
  338. expected = datetime(2014, 1, 19, 9, tzinfo=tz.tzoffset(u"UTC", -28800))
  339. res = parse(dstr, tzinfos=tzinfos)
  340. self.assert_equal_same_tz(res, expected)
  341. class ParserTest(unittest.TestCase):
  342. @classmethod
  343. def setup_class(cls):
  344. cls.tzinfos = {"BRST": -10800}
  345. cls.brsttz = tzoffset("BRST", -10800)
  346. cls.default = datetime(2003, 9, 25)
  347. # Parser should be able to handle bytestring and unicode
  348. cls.uni_str = '2014-05-01 08:00:00'
  349. cls.str_str = cls.uni_str.encode()
  350. def testParserParseStr(self):
  351. from dateutil.parser import parser
  352. assert parser().parse(self.str_str) == parser().parse(self.uni_str)
  353. def testParseUnicodeWords(self):
  354. class rus_parserinfo(parserinfo):
  355. MONTHS = [("янв", "Январь"),
  356. ("фев", "Февраль"),
  357. ("мар", "Март"),
  358. ("апр", "Апрель"),
  359. ("май", "Май"),
  360. ("июн", "Июнь"),
  361. ("июл", "Июль"),
  362. ("авг", "Август"),
  363. ("сен", "Сентябрь"),
  364. ("окт", "Октябрь"),
  365. ("ноя", "Ноябрь"),
  366. ("дек", "Декабрь")]
  367. expected = datetime(2015, 9, 10, 10, 20)
  368. res = parse('10 Сентябрь 2015 10:20', parserinfo=rus_parserinfo())
  369. assert res == expected
  370. def testParseWithNulls(self):
  371. # This relies on the from __future__ import unicode_literals, because
  372. # explicitly specifying a unicode literal is a syntax error in Py 3.2
  373. # May want to switch to u'...' if we ever drop Python 3.2 support.
  374. pstring = '\x00\x00August 29, 1924'
  375. assert parse(pstring) == datetime(1924, 8, 29)
  376. def testDateCommandFormat(self):
  377. self.assertEqual(parse("Thu Sep 25 10:36:28 BRST 2003",
  378. tzinfos=self.tzinfos),
  379. datetime(2003, 9, 25, 10, 36, 28,
  380. tzinfo=self.brsttz))
  381. def testDateCommandFormatReversed(self):
  382. self.assertEqual(parse("2003 10:36:28 BRST 25 Sep Thu",
  383. tzinfos=self.tzinfos),
  384. datetime(2003, 9, 25, 10, 36, 28,
  385. tzinfo=self.brsttz))
  386. def testDateCommandFormatWithLong(self):
  387. if PY2:
  388. self.assertEqual(parse("Thu Sep 25 10:36:28 BRST 2003",
  389. tzinfos={"BRST": long(-10800)}),
  390. datetime(2003, 9, 25, 10, 36, 28,
  391. tzinfo=self.brsttz))
  392. def testISOFormatStrip2(self):
  393. self.assertEqual(parse("2003-09-25T10:49:41+03:00"),
  394. datetime(2003, 9, 25, 10, 49, 41,
  395. tzinfo=tzoffset(None, 10800)))
  396. def testISOStrippedFormatStrip2(self):
  397. self.assertEqual(parse("20030925T104941+0300"),
  398. datetime(2003, 9, 25, 10, 49, 41,
  399. tzinfo=tzoffset(None, 10800)))
  400. def testAMPMNoHour(self):
  401. with pytest.raises(ParserError):
  402. parse("AM")
  403. with pytest.raises(ParserError):
  404. parse("Jan 20, 2015 PM")
  405. def testAMPMRange(self):
  406. with pytest.raises(ParserError):
  407. parse("13:44 AM")
  408. with pytest.raises(ParserError):
  409. parse("January 25, 1921 23:13 PM")
  410. def testPertain(self):
  411. self.assertEqual(parse("Sep 03", default=self.default),
  412. datetime(2003, 9, 3))
  413. self.assertEqual(parse("Sep of 03", default=self.default),
  414. datetime(2003, 9, 25))
  415. def testFuzzy(self):
  416. s = "Today is 25 of September of 2003, exactly " \
  417. "at 10:49:41 with timezone -03:00."
  418. self.assertEqual(parse(s, fuzzy=True),
  419. datetime(2003, 9, 25, 10, 49, 41,
  420. tzinfo=self.brsttz))
  421. def testFuzzyWithTokens(self):
  422. s1 = "Today is 25 of September of 2003, exactly " \
  423. "at 10:49:41 with timezone -03:00."
  424. self.assertEqual(parse(s1, fuzzy_with_tokens=True),
  425. (datetime(2003, 9, 25, 10, 49, 41,
  426. tzinfo=self.brsttz),
  427. ('Today is ', 'of ', ', exactly at ',
  428. ' with timezone ', '.')))
  429. s2 = "http://biz.yahoo.com/ipo/p/600221.html"
  430. self.assertEqual(parse(s2, fuzzy_with_tokens=True),
  431. (datetime(2060, 2, 21, 0, 0, 0),
  432. ('http://biz.yahoo.com/ipo/p/', '.html')))
  433. def testFuzzyAMPMProblem(self):
  434. # Sometimes fuzzy parsing results in AM/PM flag being set without
  435. # hours - if it's fuzzy it should ignore that.
  436. s1 = "I have a meeting on March 1, 1974."
  437. s2 = "On June 8th, 2020, I am going to be the first man on Mars"
  438. # Also don't want any erroneous AM or PMs changing the parsed time
  439. s3 = "Meet me at the AM/PM on Sunset at 3:00 AM on December 3rd, 2003"
  440. s4 = "Meet me at 3:00AM on December 3rd, 2003 at the AM/PM on Sunset"
  441. self.assertEqual(parse(s1, fuzzy=True), datetime(1974, 3, 1))
  442. self.assertEqual(parse(s2, fuzzy=True), datetime(2020, 6, 8))
  443. self.assertEqual(parse(s3, fuzzy=True), datetime(2003, 12, 3, 3))
  444. self.assertEqual(parse(s4, fuzzy=True), datetime(2003, 12, 3, 3))
  445. def testFuzzyIgnoreAMPM(self):
  446. s1 = "Jan 29, 1945 14:45 AM I going to see you there?"
  447. with pytest.warns(UnknownTimezoneWarning):
  448. res = parse(s1, fuzzy=True)
  449. self.assertEqual(res, datetime(1945, 1, 29, 14, 45))
  450. def testRandomFormat24(self):
  451. self.assertEqual(parse("0:00 PM, PST", default=self.default,
  452. ignoretz=True),
  453. datetime(2003, 9, 25, 12, 0))
  454. def testRandomFormat26(self):
  455. with pytest.warns(UnknownTimezoneWarning):
  456. res = parse("5:50 A.M. on June 13, 1990")
  457. self.assertEqual(res, datetime(1990, 6, 13, 5, 50))
  458. def testUnspecifiedDayFallback(self):
  459. # Test that for an unspecified day, the fallback behavior is correct.
  460. self.assertEqual(parse("April 2009", default=datetime(2010, 1, 31)),
  461. datetime(2009, 4, 30))
  462. def testUnspecifiedDayFallbackFebNoLeapYear(self):
  463. self.assertEqual(parse("Feb 2007", default=datetime(2010, 1, 31)),
  464. datetime(2007, 2, 28))
  465. def testUnspecifiedDayFallbackFebLeapYear(self):
  466. self.assertEqual(parse("Feb 2008", default=datetime(2010, 1, 31)),
  467. datetime(2008, 2, 29))
  468. def testErrorType01(self):
  469. with pytest.raises(ParserError):
  470. parse('shouldfail')
  471. def testCorrectErrorOnFuzzyWithTokens(self):
  472. assertRaisesRegex(self, ParserError, 'Unknown string format',
  473. parse, '04/04/32/423', fuzzy_with_tokens=True)
  474. assertRaisesRegex(self, ParserError, 'Unknown string format',
  475. parse, '04/04/04 +32423', fuzzy_with_tokens=True)
  476. assertRaisesRegex(self, ParserError, 'Unknown string format',
  477. parse, '04/04/0d4', fuzzy_with_tokens=True)
  478. def testIncreasingCTime(self):
  479. # This test will check 200 different years, every month, every day,
  480. # every hour, every minute, every second, and every weekday, using
  481. # a delta of more or less 1 year, 1 month, 1 day, 1 minute and
  482. # 1 second.
  483. delta = timedelta(days=365+31+1, seconds=1+60+60*60)
  484. dt = datetime(1900, 1, 1, 0, 0, 0, 0)
  485. for i in range(200):
  486. assert parse(dt.ctime()) == dt
  487. dt += delta
  488. def testIncreasingISOFormat(self):
  489. delta = timedelta(days=365+31+1, seconds=1+60+60*60)
  490. dt = datetime(1900, 1, 1, 0, 0, 0, 0)
  491. for i in range(200):
  492. assert parse(dt.isoformat()) == dt
  493. dt += delta
  494. def testMicrosecondsPrecisionError(self):
  495. # Skip found out that sad precision problem. :-(
  496. dt1 = parse("00:11:25.01")
  497. dt2 = parse("00:12:10.01")
  498. assert dt1.microsecond == 10000
  499. assert dt2.microsecond == 10000
  500. def testMicrosecondPrecisionErrorReturns(self):
  501. # One more precision issue, discovered by Eric Brown. This should
  502. # be the last one, as we're no longer using floating points.
  503. for ms in [100001, 100000, 99999, 99998,
  504. 10001, 10000, 9999, 9998,
  505. 1001, 1000, 999, 998,
  506. 101, 100, 99, 98]:
  507. dt = datetime(2008, 2, 27, 21, 26, 1, ms)
  508. assert parse(dt.isoformat()) == dt
  509. def testCustomParserInfo(self):
  510. # Custom parser info wasn't working, as Michael Elsdörfer discovered.
  511. from dateutil.parser import parserinfo, parser
  512. class myparserinfo(parserinfo):
  513. MONTHS = parserinfo.MONTHS[:]
  514. MONTHS[0] = ("Foo", "Foo")
  515. myparser = parser(myparserinfo())
  516. dt = myparser.parse("01/Foo/2007")
  517. assert dt == datetime(2007, 1, 1)
  518. def testCustomParserShortDaynames(self):
  519. # Horacio Hoyos discovered that day names shorter than 3 characters,
  520. # for example two letter German day name abbreviations, don't work:
  521. # https://github.com/dateutil/dateutil/issues/343
  522. from dateutil.parser import parserinfo, parser
  523. class GermanParserInfo(parserinfo):
  524. WEEKDAYS = [("Mo", "Montag"),
  525. ("Di", "Dienstag"),
  526. ("Mi", "Mittwoch"),
  527. ("Do", "Donnerstag"),
  528. ("Fr", "Freitag"),
  529. ("Sa", "Samstag"),
  530. ("So", "Sonntag")]
  531. myparser = parser(GermanParserInfo())
  532. dt = myparser.parse("Sa 21. Jan 2017")
  533. self.assertEqual(dt, datetime(2017, 1, 21))
  534. def testNoYearFirstNoDayFirst(self):
  535. dtstr = '090107'
  536. # Should be MMDDYY
  537. self.assertEqual(parse(dtstr),
  538. datetime(2007, 9, 1))
  539. self.assertEqual(parse(dtstr, yearfirst=False, dayfirst=False),
  540. datetime(2007, 9, 1))
  541. def testYearFirst(self):
  542. dtstr = '090107'
  543. # Should be MMDDYY
  544. self.assertEqual(parse(dtstr, yearfirst=True),
  545. datetime(2009, 1, 7))
  546. self.assertEqual(parse(dtstr, yearfirst=True, dayfirst=False),
  547. datetime(2009, 1, 7))
  548. def testDayFirst(self):
  549. dtstr = '090107'
  550. # Should be DDMMYY
  551. self.assertEqual(parse(dtstr, dayfirst=True),
  552. datetime(2007, 1, 9))
  553. self.assertEqual(parse(dtstr, yearfirst=False, dayfirst=True),
  554. datetime(2007, 1, 9))
  555. def testDayFirstYearFirst(self):
  556. dtstr = '090107'
  557. # Should be YYDDMM
  558. self.assertEqual(parse(dtstr, yearfirst=True, dayfirst=True),
  559. datetime(2009, 7, 1))
  560. def testUnambiguousYearFirst(self):
  561. dtstr = '2015 09 25'
  562. self.assertEqual(parse(dtstr, yearfirst=True),
  563. datetime(2015, 9, 25))
  564. def testUnambiguousDayFirst(self):
  565. dtstr = '2015 09 25'
  566. self.assertEqual(parse(dtstr, dayfirst=True),
  567. datetime(2015, 9, 25))
  568. def testUnambiguousDayFirstYearFirst(self):
  569. dtstr = '2015 09 25'
  570. self.assertEqual(parse(dtstr, dayfirst=True, yearfirst=True),
  571. datetime(2015, 9, 25))
  572. def test_mstridx(self):
  573. # See GH408
  574. dtstr = '2015-15-May'
  575. self.assertEqual(parse(dtstr),
  576. datetime(2015, 5, 15))
  577. def test_idx_check(self):
  578. dtstr = '2017-07-17 06:15:'
  579. # Pre-PR, the trailing colon will cause an IndexError at 824-825
  580. # when checking `i < len_l` and then accessing `l[i+1]`
  581. res = parse(dtstr, fuzzy=True)
  582. assert res == datetime(2017, 7, 17, 6, 15)
  583. def test_hmBY(self):
  584. # See GH#483
  585. dtstr = '02:17NOV2017'
  586. res = parse(dtstr, default=self.default)
  587. assert res == datetime(2017, 11, self.default.day, 2, 17)
  588. def test_validate_hour(self):
  589. # See GH353
  590. invalid = "201A-01-01T23:58:39.239769+03:00"
  591. with pytest.raises(ParserError):
  592. parse(invalid)
  593. def test_era_trailing_year(self):
  594. dstr = 'AD2001'
  595. res = parse(dstr)
  596. assert res.year == 2001, res
  597. def test_includes_timestr(self):
  598. timestr = "2020-13-97T44:61:83"
  599. try:
  600. parse(timestr)
  601. except ParserError as e:
  602. assert e.args[1] == timestr
  603. else:
  604. pytest.fail("Failed to raise ParserError")
  605. class TestOutOfBounds(object):
  606. def test_no_year_zero(self):
  607. with pytest.raises(ParserError):
  608. parse("0000 Jun 20")
  609. def test_out_of_bound_day(self):
  610. with pytest.raises(ParserError):
  611. parse("Feb 30, 2007")
  612. def test_illegal_month_error(self):
  613. with pytest.raises(ParserError):
  614. parse("0-100")
  615. def test_day_sanity(self, fuzzy):
  616. dstr = "2014-15-25"
  617. with pytest.raises(ParserError):
  618. parse(dstr, fuzzy=fuzzy)
  619. def test_minute_sanity(self, fuzzy):
  620. dstr = "2014-02-28 22:64"
  621. with pytest.raises(ParserError):
  622. parse(dstr, fuzzy=fuzzy)
  623. def test_hour_sanity(self, fuzzy):
  624. dstr = "2014-02-28 25:16 PM"
  625. with pytest.raises(ParserError):
  626. parse(dstr, fuzzy=fuzzy)
  627. def test_second_sanity(self, fuzzy):
  628. dstr = "2014-02-28 22:14:64"
  629. with pytest.raises(ParserError):
  630. parse(dstr, fuzzy=fuzzy)
  631. class TestParseUnimplementedCases(object):
  632. @pytest.mark.xfail
  633. def test_somewhat_ambiguous_string(self):
  634. # Ref: github issue #487
  635. # The parser is choosing the wrong part for hour
  636. # causing datetime to raise an exception.
  637. dtstr = '1237 PM BRST Mon Oct 30 2017'
  638. res = parse(dtstr, tzinfo=self.tzinfos)
  639. assert res == datetime(2017, 10, 30, 12, 37, tzinfo=self.tzinfos)
  640. @pytest.mark.xfail
  641. def test_YmdH_M_S(self):
  642. # found in nasdaq's ftp data
  643. dstr = '1991041310:19:24'
  644. expected = datetime(1991, 4, 13, 10, 19, 24)
  645. res = parse(dstr)
  646. assert res == expected, (res, expected)
  647. @pytest.mark.xfail
  648. def test_first_century(self):
  649. dstr = '0031 Nov 03'
  650. expected = datetime(31, 11, 3)
  651. res = parse(dstr)
  652. assert res == expected, res
  653. @pytest.mark.xfail
  654. def test_era_trailing_year_with_dots(self):
  655. dstr = 'A.D.2001'
  656. res = parse(dstr)
  657. assert res.year == 2001, res
  658. @pytest.mark.xfail
  659. def test_ad_nospace(self):
  660. expected = datetime(6, 5, 19)
  661. for dstr in [' 6AD May 19', ' 06AD May 19',
  662. ' 006AD May 19', ' 0006AD May 19']:
  663. res = parse(dstr)
  664. assert res == expected, (dstr, res)
  665. @pytest.mark.xfail
  666. def test_four_letter_day(self):
  667. dstr = 'Frid Dec 30, 2016'
  668. expected = datetime(2016, 12, 30)
  669. res = parse(dstr)
  670. assert res == expected
  671. @pytest.mark.xfail
  672. def test_non_date_number(self):
  673. dstr = '1,700'
  674. with pytest.raises(ParserError):
  675. parse(dstr)
  676. @pytest.mark.xfail
  677. def test_on_era(self):
  678. # This could be classified as an "eras" test, but the relevant part
  679. # about this is the ` on `
  680. dstr = '2:15 PM on January 2nd 1973 A.D.'
  681. expected = datetime(1973, 1, 2, 14, 15)
  682. res = parse(dstr)
  683. assert res == expected
  684. @pytest.mark.xfail
  685. def test_extraneous_year(self):
  686. # This was found in the wild at insidertrading.org
  687. dstr = "2011 MARTIN CHILDREN'S IRREVOCABLE TRUST u/a/d NOVEMBER 7, 2012"
  688. res = parse(dstr, fuzzy_with_tokens=True)
  689. expected = datetime(2012, 11, 7)
  690. assert res == expected
  691. @pytest.mark.xfail
  692. def test_extraneous_year_tokens(self):
  693. # This was found in the wild at insidertrading.org
  694. # Unlike in the case above, identifying the first "2012" as the year
  695. # would not be a problem, but inferring that the latter 2012 is hhmm
  696. # is a problem.
  697. dstr = "2012 MARTIN CHILDREN'S IRREVOCABLE TRUST u/a/d NOVEMBER 7, 2012"
  698. expected = datetime(2012, 11, 7)
  699. (res, tokens) = parse(dstr, fuzzy_with_tokens=True)
  700. assert res == expected
  701. assert tokens == ("2012 MARTIN CHILDREN'S IRREVOCABLE TRUST u/a/d ",)
  702. @pytest.mark.xfail
  703. def test_extraneous_year2(self):
  704. # This was found in the wild at insidertrading.org
  705. dstr = ("Berylson Amy Smith 1998 Grantor Retained Annuity Trust "
  706. "u/d/t November 2, 1998 f/b/o Jennifer L Berylson")
  707. res = parse(dstr, fuzzy_with_tokens=True)
  708. expected = datetime(1998, 11, 2)
  709. assert res == expected
  710. @pytest.mark.xfail
  711. def test_extraneous_year3(self):
  712. # This was found in the wild at insidertrading.org
  713. dstr = "SMITH R & WEISS D 94 CHILD TR FBO M W SMITH UDT 12/1/1994"
  714. res = parse(dstr, fuzzy_with_tokens=True)
  715. expected = datetime(1994, 12, 1)
  716. assert res == expected
  717. @pytest.mark.xfail
  718. def test_unambiguous_YYYYMM(self):
  719. # 171206 can be parsed as YYMMDD. However, 201712 cannot be parsed
  720. # as instance of YYMMDD and parser could fallback to YYYYMM format.
  721. dstr = "201712"
  722. res = parse(dstr)
  723. expected = datetime(2017, 12, 1)
  724. assert res == expected
  725. @pytest.mark.xfail
  726. def test_extraneous_numerical_content(self):
  727. # ref: https://github.com/dateutil/dateutil/issues/1029
  728. # parser interprets price and percentage as parts of the date
  729. dstr = "£14.99 (25% off, until April 20)"
  730. res = parse(dstr, fuzzy=True, default=datetime(2000, 1, 1))
  731. expected = datetime(2000, 4, 20)
  732. assert res == expected
  733. @pytest.mark.skipif(IS_WIN, reason="Windows does not use TZ var")
  734. class TestTZVar(object):
  735. def test_parse_unambiguous_nonexistent_local(self):
  736. # When dates are specified "EST" even when they should be "EDT" in the
  737. # local time zone, we should still assign the local time zone
  738. with TZEnvContext('EST+5EDT,M3.2.0/2,M11.1.0/2'):
  739. dt_exp = datetime(2011, 8, 1, 12, 30, tzinfo=tz.tzlocal())
  740. dt = parse('2011-08-01T12:30 EST')
  741. assert dt.tzname() == 'EDT'
  742. assert dt == dt_exp
  743. def test_tzlocal_in_gmt(self):
  744. # GH #318
  745. with TZEnvContext('GMT0BST,M3.5.0,M10.5.0'):
  746. # This is an imaginary datetime in tz.tzlocal() but should still
  747. # parse using the GMT-as-alias-for-UTC rule
  748. dt = parse('2004-05-01T12:00 GMT')
  749. dt_exp = datetime(2004, 5, 1, 12, tzinfo=tz.UTC)
  750. assert dt == dt_exp
  751. def test_tzlocal_parse_fold(self):
  752. # One manifestion of GH #318
  753. with TZEnvContext('EST+5EDT,M3.2.0/2,M11.1.0/2'):
  754. dt_exp = datetime(2011, 11, 6, 1, 30, tzinfo=tz.tzlocal())
  755. dt_exp = tz.enfold(dt_exp, fold=1)
  756. dt = parse('2011-11-06T01:30 EST')
  757. # Because this is ambiguous, until `tz.tzlocal() is tz.tzlocal()`
  758. # we'll just check the attributes we care about rather than
  759. # dt == dt_exp
  760. assert dt.tzname() == dt_exp.tzname()
  761. assert dt.replace(tzinfo=None) == dt_exp.replace(tzinfo=None)
  762. assert getattr(dt, 'fold') == getattr(dt_exp, 'fold')
  763. assert dt.astimezone(tz.UTC) == dt_exp.astimezone(tz.UTC)
  764. def test_parse_tzinfos_fold():
  765. NYC = tz.gettz('America/New_York')
  766. tzinfos = {'EST': NYC, 'EDT': NYC}
  767. dt_exp = tz.enfold(datetime(2011, 11, 6, 1, 30, tzinfo=NYC), fold=1)
  768. dt = parse('2011-11-06T01:30 EST', tzinfos=tzinfos)
  769. assert dt == dt_exp
  770. assert dt.tzinfo is dt_exp.tzinfo
  771. assert getattr(dt, 'fold') == getattr(dt_exp, 'fold')
  772. assert dt.astimezone(tz.UTC) == dt_exp.astimezone(tz.UTC)
  773. @pytest.mark.parametrize('dtstr,dt', [
  774. ('5.6h', datetime(2003, 9, 25, 5, 36)),
  775. ('5.6m', datetime(2003, 9, 25, 0, 5, 36)),
  776. # '5.6s' never had a rounding problem, test added for completeness
  777. ('5.6s', datetime(2003, 9, 25, 0, 0, 5, 600000))
  778. ])
  779. def test_rounding_floatlike_strings(dtstr, dt):
  780. assert parse(dtstr, default=datetime(2003, 9, 25)) == dt
  781. @pytest.mark.parametrize('value', ['1: test', 'Nan'])
  782. def test_decimal_error(value):
  783. # GH 632, GH 662 - decimal.Decimal raises some non-ParserError exception
  784. # when constructed with an invalid value
  785. with pytest.raises(ParserError):
  786. parse(value)
  787. def test_parsererror_repr():
  788. # GH 991 — the __repr__ was not properly indented and so was never defined.
  789. # This tests the current behavior of the ParserError __repr__, but the
  790. # precise format is not guaranteed to be stable and may change even in
  791. # minor versions. This test exists to avoid regressions.
  792. s = repr(ParserError("Problem with string: %s", "2019-01-01"))
  793. assert s == "ParserError('Problem with string: %s', '2019-01-01')"