test_quoting.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457
  1. import pytest
  2. from yarl._quoting import NO_EXTENSIONS
  3. from yarl._quoting_py import _Quoter as _PyQuoter
  4. from yarl._quoting_py import _Unquoter as _PyUnquoter
  5. if not NO_EXTENSIONS:
  6. from yarl._quoting_c import _Quoter as _CQuoter
  7. from yarl._quoting_c import _Unquoter as _CUnquoter
  8. @pytest.fixture(params=[_PyQuoter, _CQuoter], ids=["py_quoter", "c_quoter"])
  9. def quoter(request):
  10. return request.param
  11. @pytest.fixture(params=[_PyUnquoter, _CUnquoter], ids=["py_unquoter", "c_unquoter"])
  12. def unquoter(request):
  13. return request.param
  14. else:
  15. @pytest.fixture(params=[_PyQuoter], ids=["py_quoter"])
  16. def quoter(request):
  17. return request.param
  18. @pytest.fixture(params=[_PyUnquoter], ids=["py_unquoter"])
  19. def unquoter(request):
  20. return request.param
  21. def hexescape(char):
  22. """Escape char as RFC 2396 specifies"""
  23. hex_repr = hex(ord(char))[2:].upper()
  24. if len(hex_repr) == 1:
  25. hex_repr = "0%s" % hex_repr
  26. return "%" + hex_repr
  27. def test_quote_not_allowed_non_strict(quoter):
  28. assert quoter()("%HH") == "%25HH"
  29. def test_quote_unfinished_tail_percent_non_strict(quoter):
  30. assert quoter()("%") == "%25"
  31. def test_quote_unfinished_tail_digit_non_strict(quoter):
  32. assert quoter()("%2") == "%252"
  33. def test_quote_unfinished_tail_safe_non_strict(quoter):
  34. assert quoter()("%x") == "%25x"
  35. def test_quote_unfinished_tail_unsafe_non_strict(quoter):
  36. assert quoter()("%#") == "%25%23"
  37. def test_quote_unfinished_tail_non_ascii_non_strict(quoter):
  38. assert quoter()("%ß") == "%25%C3%9F"
  39. def test_quote_unfinished_tail_non_ascii2_non_strict(quoter):
  40. assert quoter()("%€") == "%25%E2%82%AC"
  41. def test_quote_unfinished_tail_non_ascii3_non_strict(quoter):
  42. assert quoter()("%🐍") == "%25%F0%9F%90%8D"
  43. def test_quote_from_bytes(quoter):
  44. assert quoter()("archaeological arcana") == "archaeological%20arcana"
  45. assert quoter()("") == ""
  46. def test_quote_ignore_broken_unicode(quoter):
  47. s = quoter()(
  48. "j\u001a\udcf4q\udcda/\udc97g\udcee\udccb\u000ch\udccb"
  49. "\u0018\udce4v\u001b\udce2\udcce\udccecom/y\udccepj\u0016"
  50. )
  51. assert s == "j%1Aq%2Fg%0Ch%18v%1Bcom%2Fypj%16"
  52. assert quoter()(s) == s
  53. def test_unquote_to_bytes(unquoter):
  54. assert unquoter()("abc%20def") == "abc def"
  55. assert unquoter()("") == ""
  56. def test_never_quote(quoter):
  57. # Make sure quote() does not quote letters, digits, and "_,.-~"
  58. do_not_quote = (
  59. "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789" "_.-~"
  60. )
  61. assert quoter()(do_not_quote) == do_not_quote
  62. assert quoter(qs=True)(do_not_quote) == do_not_quote
  63. def test_safe(quoter):
  64. # Test setting 'safe' parameter does what it should do
  65. quote_by_default = "<>"
  66. assert quoter(safe=quote_by_default)(quote_by_default) == quote_by_default
  67. ret = quoter(safe=quote_by_default, qs=True)(quote_by_default)
  68. assert ret == quote_by_default
  69. _SHOULD_QUOTE = [chr(num) for num in range(32)]
  70. _SHOULD_QUOTE.append(r'<>#"{}|\^[]`')
  71. _SHOULD_QUOTE.append(chr(127)) # For 0x7F
  72. SHOULD_QUOTE = "".join(_SHOULD_QUOTE)
  73. @pytest.mark.parametrize("char", SHOULD_QUOTE)
  74. def test_default_quoting(char, quoter):
  75. # Make sure all characters that should be quoted are by default sans
  76. # space (separate test for that).
  77. result = quoter()(char)
  78. assert hexescape(char) == result
  79. result = quoter(qs=True)(char)
  80. assert hexescape(char) == result
  81. # TODO: should it encode percent?
  82. def test_default_quoting_percent(quoter):
  83. result = quoter()("%25")
  84. assert "%25" == result
  85. result = quoter(qs=True)("%25")
  86. assert "%25" == result
  87. result = quoter(requote=False)("%25")
  88. assert "%2525" == result
  89. def test_default_quoting_partial(quoter):
  90. partial_quote = "ab[]cd"
  91. expected = "ab%5B%5Dcd"
  92. result = quoter()(partial_quote)
  93. assert expected == result
  94. result = quoter(qs=True)(partial_quote)
  95. assert expected == result
  96. def test_quoting_space(quoter):
  97. # Make sure quote() and quote_plus() handle spaces as specified in
  98. # their unique way
  99. result = quoter()(" ")
  100. assert result == hexescape(" ")
  101. result = quoter(qs=True)(" ")
  102. assert result == "+"
  103. given = "a b cd e f"
  104. expect = given.replace(" ", hexescape(" "))
  105. result = quoter()(given)
  106. assert expect == result
  107. expect = given.replace(" ", "+")
  108. result = quoter(qs=True)(given)
  109. assert expect == result
  110. def test_quoting_plus(quoter):
  111. assert quoter(qs=False)("alpha+beta gamma") == "alpha+beta%20gamma"
  112. assert quoter(qs=True)("alpha+beta gamma") == "alpha%2Bbeta+gamma"
  113. assert quoter(safe="+", qs=True)("alpha+beta gamma") == "alpha+beta+gamma"
  114. def test_quote_with_unicode(quoter):
  115. # Characters in Latin-1 range, encoded by default in UTF-8
  116. given = "\u00a2\u00d8ab\u00ff"
  117. expect = "%C2%A2%C3%98ab%C3%BF"
  118. result = quoter()(given)
  119. assert expect == result
  120. # Characters in BMP, encoded by default in UTF-8
  121. given = "\u6f22\u5b57" # "Kanji"
  122. expect = "%E6%BC%A2%E5%AD%97"
  123. result = quoter()(given)
  124. assert expect == result
  125. def test_quote_plus_with_unicode(quoter):
  126. # Characters in Latin-1 range, encoded by default in UTF-8
  127. given = "\u00a2\u00d8ab\u00ff"
  128. expect = "%C2%A2%C3%98ab%C3%BF"
  129. result = quoter(qs=True)(given)
  130. assert expect == result
  131. # Characters in BMP, encoded by default in UTF-8
  132. given = "\u6f22\u5b57" # "Kanji"
  133. expect = "%E6%BC%A2%E5%AD%97"
  134. result = quoter(qs=True)(given)
  135. assert expect == result
  136. @pytest.mark.parametrize("num", list(range(128)))
  137. def test_unquoting(num, unquoter):
  138. # Make sure unquoting of all ASCII values works
  139. given = hexescape(chr(num))
  140. expect = chr(num)
  141. result = unquoter()(given)
  142. assert expect == result
  143. if expect not in "+=&;":
  144. result = unquoter(qs=True)(given)
  145. assert expect == result
  146. # Expected value should be the same as given.
  147. # See https://url.spec.whatwg.org/#percent-encoded-bytes
  148. @pytest.mark.parametrize(
  149. ("input", "expected"),
  150. [
  151. ("%", "%"),
  152. ("%2", "%2"),
  153. ("%x", "%x"),
  154. ("%€", "%€"),
  155. ("%2x", "%2x"),
  156. ("%2 ", "%2 "),
  157. ("% 2", "% 2"),
  158. ("%xa", "%xa"),
  159. ("%%", "%%"),
  160. ("%%3f", "%?"),
  161. ("%2%", "%2%"),
  162. ("%2%3f", "%2?"),
  163. ("%x%3f", "%x?"),
  164. ("%€%3f", "%€?"),
  165. ],
  166. )
  167. def test_unquoting_bad_percent_escapes(unquoter, input, expected):
  168. assert unquoter()(input) == expected
  169. @pytest.mark.xfail(
  170. reason="""
  171. FIXME: After conversion to bytes, should not cause UTF-8 decode fail.
  172. See https://url.spec.whatwg.org/#percent-encoded-bytes
  173. Refs:
  174. * https://github.com/aio-libs/yarl/pull/216
  175. * https://github.com/aio-libs/yarl/pull/214
  176. * https://github.com/aio-libs/yarl/pull/7
  177. """,
  178. )
  179. @pytest.mark.parametrize("urlencoded_string", ("%AB", "%AB%AB"))
  180. def test_unquoting_invalid_utf8_sequence(unquoter, urlencoded_string):
  181. with pytest.raises(ValueError):
  182. unquoter()(urlencoded_string)
  183. def test_unquoting_mixed_case_percent_escapes(unquoter):
  184. expected = "𝕦"
  185. assert expected == unquoter()("%F0%9D%95%A6")
  186. assert expected == unquoter()("%F0%9d%95%a6")
  187. assert expected == unquoter()("%f0%9D%95%a6")
  188. assert expected == unquoter()("%f0%9d%95%a6")
  189. def test_unquoting_parts(unquoter):
  190. # Make sure unquoting works when have non-quoted characters
  191. # interspersed
  192. given = "ab" + hexescape("c") + "d"
  193. expect = "abcd"
  194. result = unquoter()(given)
  195. assert expect == result
  196. result = unquoter(qs=True)(given)
  197. assert expect == result
  198. def test_quote_None(quoter):
  199. assert quoter()(None) is None
  200. def test_unquote_None(unquoter):
  201. assert unquoter()(None) is None
  202. def test_quote_empty_string(quoter):
  203. assert quoter()("") == ""
  204. def test_unquote_empty_string(unquoter):
  205. assert unquoter()("") == ""
  206. def test_quote_bad_types(quoter):
  207. with pytest.raises(TypeError):
  208. quoter()(123)
  209. def test_unquote_bad_types(unquoter):
  210. with pytest.raises(TypeError):
  211. unquoter()(123)
  212. def test_quote_lowercase(quoter):
  213. assert quoter()("%d1%84") == "%D1%84"
  214. def test_quote_unquoted(quoter):
  215. assert quoter()("%41") == "A"
  216. def test_quote_space(quoter):
  217. assert quoter()(" ") == "%20" # NULL
  218. # test to see if this would work to fix
  219. # coverage on this file.
  220. def test_quote_percent_last_character(quoter):
  221. # % is last character in this case.
  222. assert quoter()("%") == "%25"
  223. def test_unquote_unsafe(unquoter):
  224. assert unquoter(unsafe="@")("%40") == "%40"
  225. def test_unquote_unsafe2(unquoter):
  226. assert unquoter(unsafe="@")("%40abc") == "%40abc"
  227. def test_unquote_unsafe3(unquoter):
  228. assert unquoter(qs=True)("a%2Bb=?%3D%2B%26") == "a%2Bb=?%3D%2B%26"
  229. def test_unquote_unsafe4(unquoter):
  230. assert unquoter(unsafe="@")("a@b") == "a%40b"
  231. @pytest.mark.parametrize(
  232. ("input", "expected"),
  233. [
  234. ("%e2%82", "%e2%82"),
  235. ("%e2%82ac", "%e2%82ac"),
  236. ("%e2%82%f8", "%e2%82%f8"),
  237. ("%e2%82%2b", "%e2%82+"),
  238. ("%e2%82%e2%82%ac", "%e2%82€"),
  239. ("%e2%82%e2%82", "%e2%82%e2%82"),
  240. ],
  241. )
  242. def test_unquote_non_utf8(unquoter, input, expected):
  243. assert unquoter()(input) == expected
  244. def test_unquote_unsafe_non_utf8(unquoter):
  245. assert unquoter(unsafe="\n")("%e2%82%0a") == "%e2%82%0A"
  246. def test_unquote_plus_non_utf8(unquoter):
  247. assert unquoter(qs=True)("%e2%82%2b") == "%e2%82%2B"
  248. def test_quote_non_ascii(quoter):
  249. assert quoter()("%F8") == "%F8"
  250. def test_quote_non_ascii2(quoter):
  251. assert quoter()("a%F8b") == "a%F8b"
  252. def test_quote_percent_percent_encoded(quoter):
  253. assert quoter()("%%3f") == "%25%3F"
  254. def test_quote_percent_digit_percent_encoded(quoter):
  255. assert quoter()("%2%3f") == "%252%3F"
  256. def test_quote_percent_safe_percent_encoded(quoter):
  257. assert quoter()("%x%3f") == "%25x%3F"
  258. def test_quote_percent_unsafe_percent_encoded(quoter):
  259. assert quoter()("%#%3f") == "%25%23%3F"
  260. def test_quote_percent_non_ascii_percent_encoded(quoter):
  261. assert quoter()("%ß%3f") == "%25%C3%9F%3F"
  262. def test_quote_percent_non_ascii2_percent_encoded(quoter):
  263. assert quoter()("%€%3f") == "%25%E2%82%AC%3F"
  264. def test_quote_percent_non_ascii3_percent_encoded(quoter):
  265. assert quoter()("%🐍%3f") == "%25%F0%9F%90%8D%3F"
  266. class StrLike(str):
  267. pass
  268. def test_quote_str_like(quoter):
  269. assert quoter()(StrLike("abc")) == "abc"
  270. def test_unquote_str_like(unquoter):
  271. assert unquoter()(StrLike("abc")) == "abc"
  272. def test_quote_sub_delims(quoter):
  273. assert quoter()("!$&'()*+,;=") == "!$&'()*+,;="
  274. def test_requote_sub_delims(quoter):
  275. assert quoter()("%21%24%26%27%28%29%2A%2B%2C%3B%3D") == "!$&'()*+,;="
  276. def test_unquoting_plus(unquoter):
  277. assert unquoter(qs=False)("a+b") == "a+b"
  278. def test_unquote_plus_to_space(unquoter):
  279. assert unquoter(qs=True)("a+b") == "a b"
  280. def test_unquote_plus_to_space_unsafe(unquoter):
  281. assert unquoter(unsafe="+", qs=True)("a+b") == "a+b"
  282. def test_quote_qs_with_colon(quoter):
  283. s = quoter(safe="=+&?/:@", qs=True)("next=http%3A//example.com/")
  284. assert s == "next=http://example.com/"
  285. def test_quote_protected(quoter):
  286. s = quoter(protected="/")("/path%2fto/three")
  287. assert s == "/path%2Fto/three"
  288. def test_quote_fastpath_safe(quoter):
  289. s1 = "/path/to"
  290. s2 = quoter(safe="/")(s1)
  291. assert s1 is s2
  292. def test_quote_fastpath_pct(quoter):
  293. s1 = "abc%A0"
  294. s2 = quoter()(s1)
  295. assert s1 is s2
  296. def test_quote_very_large_string(quoter):
  297. # more than 8 KiB
  298. s = "abcфух%30%0a" * 1024
  299. assert quoter()(s) == "abc%D1%84%D1%83%D1%850%0A" * 1024
  300. def test_space(quoter):
  301. s = "% A"
  302. assert quoter()(s) == "%25%20A"
  303. def test_quoter_path_with_plus(quoter):
  304. s = "/test/x+y%2Bz/:+%2B/"
  305. assert "/test/x+y%2Bz/:+%2B/" == quoter(safe="@:", protected="/+")(s)
  306. def test_unquoter_path_with_plus(unquoter):
  307. s = "/test/x+y%2Bz/:+%2B/"
  308. assert "/test/x+y+z/:++/" == unquoter(unsafe="+")(s)