test_url_query.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. from typing import List, Tuple
  2. from urllib.parse import parse_qs, urlencode
  3. import pytest
  4. from multidict import MultiDict, MultiDictProxy
  5. from yarl import URL
  6. # ========================================
  7. # Basic chars in query values
  8. # ========================================
  9. URLS_WITH_BASIC_QUERY_VALUES: List[Tuple[URL, MultiDict]] = [
  10. # Empty strings, keys and values
  11. (
  12. URL("http://example.com"),
  13. MultiDict(),
  14. ),
  15. (
  16. URL("http://example.com?a="),
  17. MultiDict([("a", "")]),
  18. ),
  19. # ASCII chars
  20. (
  21. URL("http://example.com?a+b=c+d"),
  22. MultiDict({"a b": "c d"}),
  23. ),
  24. (
  25. URL("http://example.com?a=1&b=2"),
  26. MultiDict([("a", "1"), ("b", "2")]),
  27. ),
  28. (
  29. URL("http://example.com?a=1&b=2&a=3"),
  30. MultiDict([("a", "1"), ("b", "2"), ("a", "3")]),
  31. ),
  32. # Non-ASCI BMP chars
  33. (
  34. URL("http://example.com?ключ=знач"),
  35. MultiDict({"ключ": "знач"}),
  36. ),
  37. (
  38. URL("http://example.com?foo=ᴜɴɪᴄᴏᴅᴇ"),
  39. MultiDict({"foo": "ᴜɴɪᴄᴏᴅᴇ"}),
  40. ),
  41. # Non-BMP chars
  42. (
  43. URL("http://example.com?bar=𝕦𝕟𝕚𝕔𝕠𝕕𝕖"),
  44. MultiDict({"bar": "𝕦𝕟𝕚𝕔𝕠𝕕𝕖"}),
  45. ),
  46. ]
  47. @pytest.mark.parametrize(
  48. "original_url, expected_query",
  49. URLS_WITH_BASIC_QUERY_VALUES,
  50. )
  51. def test_query_basic_parsing(original_url, expected_query):
  52. assert isinstance(original_url.query, MultiDictProxy)
  53. assert original_url.query == expected_query
  54. @pytest.mark.parametrize(
  55. "original_url, expected_query",
  56. URLS_WITH_BASIC_QUERY_VALUES,
  57. )
  58. def test_query_basic_update_query(original_url, expected_query):
  59. new_url = original_url.update_query({})
  60. assert new_url == original_url
  61. def test_query_dont_unqoute_twice():
  62. sample_url = "http://base.place?" + urlencode({"a": "/////"})
  63. query = urlencode({"url": sample_url})
  64. full_url = "http://test_url.aha?" + query
  65. url = URL(full_url)
  66. assert url.query["url"] == sample_url
  67. # ========================================
  68. # Reserved chars in query values
  69. # ========================================
  70. # See https://github.com/python/cpython#87133, which introduced a new
  71. # `separator` keyword argument to `urllib.parse.parse_qs` (among others).
  72. # If the name doesn't exist as a variable in the function bytecode, the
  73. # test is expected to fail.
  74. _SEMICOLON_XFAIL = pytest.mark.xfail(
  75. condition="separator" not in parse_qs.__code__.co_varnames,
  76. reason=(
  77. "Python versions < 3.7.10, < 3.8.8 and < 3.9.2 lack a fix for "
  78. 'CVE-2021-23336 dropping ";" as a valid query parameter separator, '
  79. "making this test fail."
  80. ),
  81. strict=True,
  82. )
  83. URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES = [
  84. # Ampersand
  85. (URL("http://127.0.0.1/?a=10&b=20"), 2, "10"),
  86. (URL("http://127.0.0.1/?a=10%26b=20"), 1, "10&b=20"),
  87. (URL("http://127.0.0.1/?a=10%3Bb=20"), 1, "10;b=20"),
  88. # Semicolon, which is *not* a query parameter separator as of RFC3986
  89. (URL("http://127.0.0.1/?a=10;b=20"), 1, "10;b=20"),
  90. (URL("http://127.0.0.1/?a=10%26b=20"), 1, "10&b=20"),
  91. (URL("http://127.0.0.1/?a=10%3Bb=20"), 1, "10;b=20"),
  92. ]
  93. URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES_W_XFAIL = [
  94. # Ampersand
  95. *URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES[:3],
  96. # Semicolon, which is *not* a query parameter separator as of RFC3986
  97. # Mark the first of these as expecting to fail on old Python patch releases.
  98. pytest.param(*URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES[3], marks=_SEMICOLON_XFAIL),
  99. *URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES[4:],
  100. ]
  101. @pytest.mark.parametrize(
  102. "original_url, expected_query_len, expected_value_a",
  103. URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES_W_XFAIL,
  104. )
  105. def test_query_separators_from_parsing(
  106. original_url,
  107. expected_query_len,
  108. expected_value_a,
  109. ):
  110. assert len(original_url.query) == expected_query_len
  111. assert original_url.query["a"] == expected_value_a
  112. @pytest.mark.parametrize(
  113. "original_url, expected_query_len, expected_value_a",
  114. URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES_W_XFAIL,
  115. )
  116. def test_query_separators_from_update_query(
  117. original_url,
  118. expected_query_len,
  119. expected_value_a,
  120. ):
  121. new_url = original_url.update_query({"c": expected_value_a})
  122. assert new_url.query["a"] == expected_value_a
  123. assert new_url.query["c"] == expected_value_a
  124. @pytest.mark.parametrize(
  125. "original_url, expected_query_len, expected_value_a",
  126. URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES,
  127. )
  128. def test_query_separators_from_with_query(
  129. original_url,
  130. expected_query_len,
  131. expected_value_a,
  132. ):
  133. new_url = original_url.with_query({"c": expected_value_a})
  134. assert new_url.query["c"] == expected_value_a
  135. @pytest.mark.parametrize(
  136. "original_url, expected_query_len, expected_value_a",
  137. URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES,
  138. )
  139. def test_query_from_empty_update_query(
  140. original_url,
  141. expected_query_len,
  142. expected_value_a,
  143. ):
  144. new_url = original_url.update_query({})
  145. assert new_url.query["a"] == original_url.query["a"]
  146. if "b" in original_url.query:
  147. assert new_url.query["b"] == original_url.query["b"]