_url.py 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200
  1. import functools
  2. import math
  3. import warnings
  4. from collections.abc import Mapping, Sequence
  5. from contextlib import suppress
  6. from ipaddress import ip_address
  7. from urllib.parse import SplitResult, parse_qsl, quote, urljoin, urlsplit, urlunsplit
  8. import idna
  9. from multidict import MultiDict, MultiDictProxy
  10. from ._quoting import _Quoter, _Unquoter
  11. DEFAULT_PORTS = {"http": 80, "https": 443, "ws": 80, "wss": 443}
  12. sentinel = object()
  13. def rewrite_module(obj: object) -> object:
  14. obj.__module__ = "yarl"
  15. return obj
  16. class cached_property:
  17. """Use as a class method decorator. It operates almost exactly like
  18. the Python `@property` decorator, but it puts the result of the
  19. method it decorates into the instance dict after the first call,
  20. effectively replacing the function it decorates with an instance
  21. variable. It is, in Python parlance, a data descriptor.
  22. """
  23. def __init__(self, wrapped):
  24. self.wrapped = wrapped
  25. try:
  26. self.__doc__ = wrapped.__doc__
  27. except AttributeError: # pragma: no cover
  28. self.__doc__ = ""
  29. self.name = wrapped.__name__
  30. def __get__(self, inst, owner, _sentinel=sentinel):
  31. if inst is None:
  32. return self
  33. val = inst._cache.get(self.name, _sentinel)
  34. if val is not _sentinel:
  35. return val
  36. val = self.wrapped(inst)
  37. inst._cache[self.name] = val
  38. return val
  39. def __set__(self, inst, value):
  40. raise AttributeError("cached property is read-only")
  41. def _normalize_path_segments(segments):
  42. """Drop '.' and '..' from a sequence of str segments"""
  43. resolved_path = []
  44. for seg in segments:
  45. if seg == "..":
  46. # ignore any .. segments that would otherwise cause an
  47. # IndexError when popped from resolved_path if
  48. # resolving for rfc3986
  49. with suppress(IndexError):
  50. resolved_path.pop()
  51. elif seg != ".":
  52. resolved_path.append(seg)
  53. if segments and segments[-1] in (".", ".."):
  54. # do some post-processing here.
  55. # if the last segment was a relative dir,
  56. # then we need to append the trailing '/'
  57. resolved_path.append("")
  58. return resolved_path
  59. @rewrite_module
  60. class URL:
  61. # Don't derive from str
  62. # follow pathlib.Path design
  63. # probably URL will not suffer from pathlib problems:
  64. # it's intended for libraries like aiohttp,
  65. # not to be passed into standard library functions like os.open etc.
  66. # URL grammar (RFC 3986)
  67. # pct-encoded = "%" HEXDIG HEXDIG
  68. # reserved = gen-delims / sub-delims
  69. # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
  70. # sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
  71. # / "*" / "+" / "," / ";" / "="
  72. # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
  73. # URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
  74. # hier-part = "//" authority path-abempty
  75. # / path-absolute
  76. # / path-rootless
  77. # / path-empty
  78. # scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
  79. # authority = [ userinfo "@" ] host [ ":" port ]
  80. # userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
  81. # host = IP-literal / IPv4address / reg-name
  82. # IP-literal = "[" ( IPv6address / IPvFuture ) "]"
  83. # IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
  84. # IPv6address = 6( h16 ":" ) ls32
  85. # / "::" 5( h16 ":" ) ls32
  86. # / [ h16 ] "::" 4( h16 ":" ) ls32
  87. # / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
  88. # / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
  89. # / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
  90. # / [ *4( h16 ":" ) h16 ] "::" ls32
  91. # / [ *5( h16 ":" ) h16 ] "::" h16
  92. # / [ *6( h16 ":" ) h16 ] "::"
  93. # ls32 = ( h16 ":" h16 ) / IPv4address
  94. # ; least-significant 32 bits of address
  95. # h16 = 1*4HEXDIG
  96. # ; 16 bits of address represented in hexadecimal
  97. # IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
  98. # dec-octet = DIGIT ; 0-9
  99. # / %x31-39 DIGIT ; 10-99
  100. # / "1" 2DIGIT ; 100-199
  101. # / "2" %x30-34 DIGIT ; 200-249
  102. # / "25" %x30-35 ; 250-255
  103. # reg-name = *( unreserved / pct-encoded / sub-delims )
  104. # port = *DIGIT
  105. # path = path-abempty ; begins with "/" or is empty
  106. # / path-absolute ; begins with "/" but not "//"
  107. # / path-noscheme ; begins with a non-colon segment
  108. # / path-rootless ; begins with a segment
  109. # / path-empty ; zero characters
  110. # path-abempty = *( "/" segment )
  111. # path-absolute = "/" [ segment-nz *( "/" segment ) ]
  112. # path-noscheme = segment-nz-nc *( "/" segment )
  113. # path-rootless = segment-nz *( "/" segment )
  114. # path-empty = 0<pchar>
  115. # segment = *pchar
  116. # segment-nz = 1*pchar
  117. # segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
  118. # ; non-zero-length segment without any colon ":"
  119. # pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
  120. # query = *( pchar / "/" / "?" )
  121. # fragment = *( pchar / "/" / "?" )
  122. # URI-reference = URI / relative-ref
  123. # relative-ref = relative-part [ "?" query ] [ "#" fragment ]
  124. # relative-part = "//" authority path-abempty
  125. # / path-absolute
  126. # / path-noscheme
  127. # / path-empty
  128. # absolute-URI = scheme ":" hier-part [ "?" query ]
  129. __slots__ = ("_cache", "_val")
  130. _QUOTER = _Quoter(requote=False)
  131. _REQUOTER = _Quoter()
  132. _PATH_QUOTER = _Quoter(safe="@:", protected="/+", requote=False)
  133. _PATH_REQUOTER = _Quoter(safe="@:", protected="/+")
  134. _QUERY_QUOTER = _Quoter(safe="?/:@", protected="=+&;", qs=True, requote=False)
  135. _QUERY_REQUOTER = _Quoter(safe="?/:@", protected="=+&;", qs=True)
  136. _QUERY_PART_QUOTER = _Quoter(safe="?/:@", qs=True, requote=False)
  137. _FRAGMENT_QUOTER = _Quoter(safe="?/:@", requote=False)
  138. _FRAGMENT_REQUOTER = _Quoter(safe="?/:@")
  139. _UNQUOTER = _Unquoter()
  140. _PATH_UNQUOTER = _Unquoter(unsafe="+")
  141. _QS_UNQUOTER = _Unquoter(qs=True)
  142. def __new__(cls, val="", *, encoded=False, strict=None):
  143. if strict is not None: # pragma: no cover
  144. warnings.warn("strict parameter is ignored")
  145. if type(val) is cls:
  146. return val
  147. if type(val) is str:
  148. val = urlsplit(val)
  149. elif type(val) is SplitResult:
  150. if not encoded:
  151. raise ValueError("Cannot apply decoding to SplitResult")
  152. elif isinstance(val, str):
  153. val = urlsplit(str(val))
  154. else:
  155. raise TypeError("Constructor parameter should be str")
  156. if not encoded:
  157. if not val[1]: # netloc
  158. netloc = ""
  159. host = ""
  160. else:
  161. host = val.hostname
  162. if host is None:
  163. raise ValueError("Invalid URL: host is required for absolute urls")
  164. try:
  165. port = val.port
  166. except ValueError as e:
  167. raise ValueError(
  168. "Invalid URL: port can't be converted to integer"
  169. ) from e
  170. netloc = cls._make_netloc(
  171. val.username, val.password, host, port, encode=True, requote=True
  172. )
  173. path = cls._PATH_REQUOTER(val[2])
  174. if netloc:
  175. path = cls._normalize_path(path)
  176. cls._validate_authority_uri_abs_path(host=host, path=path)
  177. query = cls._QUERY_REQUOTER(val[3])
  178. fragment = cls._FRAGMENT_REQUOTER(val[4])
  179. val = SplitResult(val[0], netloc, path, query, fragment)
  180. self = object.__new__(cls)
  181. self._val = val
  182. self._cache = {}
  183. return self
  184. @classmethod
  185. def build(
  186. cls,
  187. *,
  188. scheme="",
  189. authority="",
  190. user=None,
  191. password=None,
  192. host="",
  193. port=None,
  194. path="",
  195. query=None,
  196. query_string="",
  197. fragment="",
  198. encoded=False,
  199. ):
  200. """Creates and returns a new URL"""
  201. if authority and (user or password or host or port):
  202. raise ValueError(
  203. 'Can\'t mix "authority" with "user", "password", "host" or "port".'
  204. )
  205. if not isinstance(port, (int, type(None))):
  206. raise TypeError("The port is required to be int.")
  207. if port and not host:
  208. raise ValueError('Can\'t build URL with "port" but without "host".')
  209. if query and query_string:
  210. raise ValueError('Only one of "query" or "query_string" should be passed')
  211. if (
  212. scheme is None
  213. or authority is None
  214. or host is None
  215. or path is None
  216. or query_string is None
  217. or fragment is None
  218. ):
  219. raise TypeError(
  220. 'NoneType is illegal for "scheme", "authority", "host", "path", '
  221. '"query_string", and "fragment" args, use empty string instead.'
  222. )
  223. if authority:
  224. if encoded:
  225. netloc = authority
  226. else:
  227. tmp = SplitResult("", authority, "", "", "")
  228. netloc = cls._make_netloc(
  229. tmp.username, tmp.password, tmp.hostname, tmp.port, encode=True
  230. )
  231. elif not user and not password and not host and not port:
  232. netloc = ""
  233. else:
  234. netloc = cls._make_netloc(
  235. user, password, host, port, encode=not encoded, encode_host=not encoded
  236. )
  237. if not encoded:
  238. path = cls._PATH_QUOTER(path)
  239. if netloc:
  240. path = cls._normalize_path(path)
  241. cls._validate_authority_uri_abs_path(host=host, path=path)
  242. query_string = cls._QUERY_QUOTER(query_string)
  243. fragment = cls._FRAGMENT_QUOTER(fragment)
  244. url = cls(
  245. SplitResult(scheme, netloc, path, query_string, fragment), encoded=True
  246. )
  247. if query:
  248. return url.with_query(query)
  249. else:
  250. return url
  251. def __init_subclass__(cls):
  252. raise TypeError(f"Inheriting a class {cls!r} from URL is forbidden")
  253. def __str__(self):
  254. val = self._val
  255. if not val.path and self.is_absolute() and (val.query or val.fragment):
  256. val = val._replace(path="/")
  257. return urlunsplit(val)
  258. def __repr__(self):
  259. return f"{self.__class__.__name__}('{str(self)}')"
  260. def __bytes__(self):
  261. return str(self).encode("ascii")
  262. def __eq__(self, other):
  263. if not type(other) is URL:
  264. return NotImplemented
  265. val1 = self._val
  266. if not val1.path and self.is_absolute():
  267. val1 = val1._replace(path="/")
  268. val2 = other._val
  269. if not val2.path and other.is_absolute():
  270. val2 = val2._replace(path="/")
  271. return val1 == val2
  272. def __hash__(self):
  273. ret = self._cache.get("hash")
  274. if ret is None:
  275. val = self._val
  276. if not val.path and self.is_absolute():
  277. val = val._replace(path="/")
  278. ret = self._cache["hash"] = hash(val)
  279. return ret
  280. def __le__(self, other):
  281. if not type(other) is URL:
  282. return NotImplemented
  283. return self._val <= other._val
  284. def __lt__(self, other):
  285. if not type(other) is URL:
  286. return NotImplemented
  287. return self._val < other._val
  288. def __ge__(self, other):
  289. if not type(other) is URL:
  290. return NotImplemented
  291. return self._val >= other._val
  292. def __gt__(self, other):
  293. if not type(other) is URL:
  294. return NotImplemented
  295. return self._val > other._val
  296. def __truediv__(self, name):
  297. if not isinstance(name, str):
  298. return NotImplemented
  299. return self._make_child((str(name),))
  300. def __mod__(self, query):
  301. return self.update_query(query)
  302. def __bool__(self) -> bool:
  303. return bool(
  304. self._val.netloc or self._val.path or self._val.query or self._val.fragment
  305. )
  306. def __getstate__(self):
  307. return (self._val,)
  308. def __setstate__(self, state):
  309. if state[0] is None and isinstance(state[1], dict):
  310. # default style pickle
  311. self._val = state[1]["_val"]
  312. else:
  313. self._val, *unused = state
  314. self._cache = {}
  315. def is_absolute(self):
  316. """A check for absolute URLs.
  317. Return True for absolute ones (having scheme or starting
  318. with //), False otherwise.
  319. """
  320. return self.raw_host is not None
  321. def is_default_port(self):
  322. """A check for default port.
  323. Return True if port is default for specified scheme,
  324. e.g. 'http://python.org' or 'http://python.org:80', False
  325. otherwise.
  326. """
  327. if self.port is None:
  328. return False
  329. default = DEFAULT_PORTS.get(self.scheme)
  330. if default is None:
  331. return False
  332. return self.port == default
  333. def origin(self):
  334. """Return an URL with scheme, host and port parts only.
  335. user, password, path, query and fragment are removed.
  336. """
  337. # TODO: add a keyword-only option for keeping user/pass maybe?
  338. if not self.is_absolute():
  339. raise ValueError("URL should be absolute")
  340. if not self._val.scheme:
  341. raise ValueError("URL should have scheme")
  342. v = self._val
  343. netloc = self._make_netloc(None, None, v.hostname, v.port)
  344. val = v._replace(netloc=netloc, path="", query="", fragment="")
  345. return URL(val, encoded=True)
  346. def relative(self):
  347. """Return a relative part of the URL.
  348. scheme, user, password, host and port are removed.
  349. """
  350. if not self.is_absolute():
  351. raise ValueError("URL should be absolute")
  352. val = self._val._replace(scheme="", netloc="")
  353. return URL(val, encoded=True)
  354. @property
  355. def scheme(self):
  356. """Scheme for absolute URLs.
  357. Empty string for relative URLs or URLs starting with //
  358. """
  359. return self._val.scheme
  360. @property
  361. def raw_authority(self):
  362. """Encoded authority part of URL.
  363. Empty string for relative URLs.
  364. """
  365. return self._val.netloc
  366. @cached_property
  367. def authority(self):
  368. """Decoded authority part of URL.
  369. Empty string for relative URLs.
  370. """
  371. return self._make_netloc(
  372. self.user, self.password, self.host, self.port, encode_host=False
  373. )
  374. @property
  375. def raw_user(self):
  376. """Encoded user part of URL.
  377. None if user is missing.
  378. """
  379. # not .username
  380. ret = self._val.username
  381. if not ret:
  382. return None
  383. return ret
  384. @cached_property
  385. def user(self):
  386. """Decoded user part of URL.
  387. None if user is missing.
  388. """
  389. return self._UNQUOTER(self.raw_user)
  390. @property
  391. def raw_password(self):
  392. """Encoded password part of URL.
  393. None if password is missing.
  394. """
  395. return self._val.password
  396. @cached_property
  397. def password(self):
  398. """Decoded password part of URL.
  399. None if password is missing.
  400. """
  401. return self._UNQUOTER(self.raw_password)
  402. @property
  403. def raw_host(self):
  404. """Encoded host part of URL.
  405. None for relative URLs.
  406. """
  407. # Use host instead of hostname for sake of shortness
  408. # May add .hostname prop later
  409. return self._val.hostname
  410. @cached_property
  411. def host(self):
  412. """Decoded host part of URL.
  413. None for relative URLs.
  414. """
  415. raw = self.raw_host
  416. if raw is None:
  417. return None
  418. if "%" in raw:
  419. # Hack for scoped IPv6 addresses like
  420. # fe80::2%Перевірка
  421. # presence of '%' sign means only IPv6 address, so idna is useless.
  422. return raw
  423. return _idna_decode(raw)
  424. @property
  425. def port(self):
  426. """Port part of URL, with scheme-based fallback.
  427. None for relative URLs or URLs without explicit port and
  428. scheme without default port substitution.
  429. """
  430. return self._val.port or DEFAULT_PORTS.get(self._val.scheme)
  431. @property
  432. def explicit_port(self):
  433. """Port part of URL, without scheme-based fallback.
  434. None for relative URLs or URLs without explicit port.
  435. """
  436. return self._val.port
  437. @property
  438. def raw_path(self):
  439. """Encoded path of URL.
  440. / for absolute URLs without path part.
  441. """
  442. ret = self._val.path
  443. if not ret and self.is_absolute():
  444. ret = "/"
  445. return ret
  446. @cached_property
  447. def path(self):
  448. """Decoded path of URL.
  449. / for absolute URLs without path part.
  450. """
  451. return self._PATH_UNQUOTER(self.raw_path)
  452. @cached_property
  453. def query(self):
  454. """A MultiDictProxy representing parsed query parameters in decoded
  455. representation.
  456. Empty value if URL has no query part.
  457. """
  458. ret = MultiDict(parse_qsl(self.raw_query_string, keep_blank_values=True))
  459. return MultiDictProxy(ret)
  460. @property
  461. def raw_query_string(self):
  462. """Encoded query part of URL.
  463. Empty string if query is missing.
  464. """
  465. return self._val.query
  466. @cached_property
  467. def query_string(self):
  468. """Decoded query part of URL.
  469. Empty string if query is missing.
  470. """
  471. return self._QS_UNQUOTER(self.raw_query_string)
  472. @cached_property
  473. def path_qs(self):
  474. """Decoded path of URL with query."""
  475. if not self.query_string:
  476. return self.path
  477. return f"{self.path}?{self.query_string}"
  478. @cached_property
  479. def raw_path_qs(self):
  480. """Encoded path of URL with query."""
  481. if not self.raw_query_string:
  482. return self.raw_path
  483. return f"{self.raw_path}?{self.raw_query_string}"
  484. @property
  485. def raw_fragment(self):
  486. """Encoded fragment part of URL.
  487. Empty string if fragment is missing.
  488. """
  489. return self._val.fragment
  490. @cached_property
  491. def fragment(self):
  492. """Decoded fragment part of URL.
  493. Empty string if fragment is missing.
  494. """
  495. return self._UNQUOTER(self.raw_fragment)
  496. @cached_property
  497. def raw_parts(self):
  498. """A tuple containing encoded *path* parts.
  499. ('/',) for absolute URLs if *path* is missing.
  500. """
  501. path = self._val.path
  502. if self.is_absolute():
  503. if not path:
  504. parts = ["/"]
  505. else:
  506. parts = ["/"] + path[1:].split("/")
  507. else:
  508. if path.startswith("/"):
  509. parts = ["/"] + path[1:].split("/")
  510. else:
  511. parts = path.split("/")
  512. return tuple(parts)
  513. @cached_property
  514. def parts(self):
  515. """A tuple containing decoded *path* parts.
  516. ('/',) for absolute URLs if *path* is missing.
  517. """
  518. return tuple(self._UNQUOTER(part) for part in self.raw_parts)
  519. @cached_property
  520. def parent(self):
  521. """A new URL with last part of path removed and cleaned up query and
  522. fragment.
  523. """
  524. path = self.raw_path
  525. if not path or path == "/":
  526. if self.raw_fragment or self.raw_query_string:
  527. return URL(self._val._replace(query="", fragment=""), encoded=True)
  528. return self
  529. parts = path.split("/")
  530. val = self._val._replace(path="/".join(parts[:-1]), query="", fragment="")
  531. return URL(val, encoded=True)
  532. @cached_property
  533. def raw_name(self):
  534. """The last part of raw_parts."""
  535. parts = self.raw_parts
  536. if self.is_absolute():
  537. parts = parts[1:]
  538. if not parts:
  539. return ""
  540. else:
  541. return parts[-1]
  542. else:
  543. return parts[-1]
  544. @cached_property
  545. def name(self):
  546. """The last part of parts."""
  547. return self._UNQUOTER(self.raw_name)
  548. @cached_property
  549. def raw_suffix(self):
  550. name = self.raw_name
  551. i = name.rfind(".")
  552. if 0 < i < len(name) - 1:
  553. return name[i:]
  554. else:
  555. return ""
  556. @cached_property
  557. def suffix(self):
  558. return self._UNQUOTER(self.raw_suffix)
  559. @cached_property
  560. def raw_suffixes(self):
  561. name = self.raw_name
  562. if name.endswith("."):
  563. return ()
  564. name = name.lstrip(".")
  565. return tuple("." + suffix for suffix in name.split(".")[1:])
  566. @cached_property
  567. def suffixes(self):
  568. return tuple(self._UNQUOTER(suffix) for suffix in self.raw_suffixes)
  569. @staticmethod
  570. def _validate_authority_uri_abs_path(host, path):
  571. """Ensure that path in URL with authority starts with a leading slash.
  572. Raise ValueError if not.
  573. """
  574. if len(host) > 0 and len(path) > 0 and not path.startswith("/"):
  575. raise ValueError(
  576. "Path in a URL with authority should start with a slash ('/') if set"
  577. )
  578. def _make_child(self, segments, encoded=False):
  579. """add segments to self._val.path, accounting for absolute vs relative paths"""
  580. # keep the trailing slash if the last segment ends with /
  581. parsed = [""] if segments and segments[-1][-1:] == "/" else []
  582. for seg in reversed(segments):
  583. if not seg:
  584. continue
  585. if seg[0] == "/":
  586. raise ValueError(
  587. f"Appending path {seg!r} starting from slash is forbidden"
  588. )
  589. seg = seg if encoded else self._PATH_QUOTER(seg)
  590. if "/" in seg:
  591. parsed += (
  592. sub for sub in reversed(seg.split("/")) if sub and sub != "."
  593. )
  594. elif seg != ".":
  595. parsed.append(seg)
  596. parsed.reverse()
  597. old_path = self._val.path
  598. if old_path:
  599. parsed = [*old_path.rstrip("/").split("/"), *parsed]
  600. if self.is_absolute():
  601. parsed = _normalize_path_segments(parsed)
  602. if parsed and parsed[0] != "":
  603. # inject a leading slash when adding a path to an absolute URL
  604. # where there was none before
  605. parsed = ["", *parsed]
  606. new_path = "/".join(parsed)
  607. return URL(
  608. self._val._replace(path=new_path, query="", fragment=""), encoded=True
  609. )
  610. @classmethod
  611. def _normalize_path(cls, path):
  612. # Drop '.' and '..' from str path
  613. prefix = ""
  614. if path.startswith("/"):
  615. # preserve the "/" root element of absolute paths, copying it to the
  616. # normalised output as per sections 5.2.4 and 6.2.2.3 of rfc3986.
  617. prefix = "/"
  618. path = path[1:]
  619. segments = path.split("/")
  620. return prefix + "/".join(_normalize_path_segments(segments))
  621. @classmethod
  622. def _encode_host(cls, host, human=False):
  623. try:
  624. ip, sep, zone = host.partition("%")
  625. ip = ip_address(ip)
  626. except ValueError:
  627. host = host.lower()
  628. # IDNA encoding is slow,
  629. # skip it for ASCII-only strings
  630. # Don't move the check into _idna_encode() helper
  631. # to reduce the cache size
  632. if human or host.isascii():
  633. return host
  634. host = _idna_encode(host)
  635. else:
  636. host = ip.compressed
  637. if sep:
  638. host += "%" + zone
  639. if ip.version == 6:
  640. host = "[" + host + "]"
  641. return host
  642. @classmethod
  643. def _make_netloc(
  644. cls, user, password, host, port, encode=False, encode_host=True, requote=False
  645. ):
  646. quoter = cls._REQUOTER if requote else cls._QUOTER
  647. if encode_host:
  648. ret = cls._encode_host(host)
  649. else:
  650. ret = host
  651. if port is not None:
  652. ret = ret + ":" + str(port)
  653. if password is not None:
  654. if not user:
  655. user = ""
  656. else:
  657. if encode:
  658. user = quoter(user)
  659. if encode:
  660. password = quoter(password)
  661. user = user + ":" + password
  662. elif user and encode:
  663. user = quoter(user)
  664. if user:
  665. ret = user + "@" + ret
  666. return ret
  667. def with_scheme(self, scheme):
  668. """Return a new URL with scheme replaced."""
  669. # N.B. doesn't cleanup query/fragment
  670. if not isinstance(scheme, str):
  671. raise TypeError("Invalid scheme type")
  672. if not self.is_absolute():
  673. raise ValueError("scheme replacement is not allowed for relative URLs")
  674. return URL(self._val._replace(scheme=scheme.lower()), encoded=True)
  675. def with_user(self, user):
  676. """Return a new URL with user replaced.
  677. Autoencode user if needed.
  678. Clear user/password if user is None.
  679. """
  680. # N.B. doesn't cleanup query/fragment
  681. val = self._val
  682. if user is None:
  683. password = None
  684. elif isinstance(user, str):
  685. user = self._QUOTER(user)
  686. password = val.password
  687. else:
  688. raise TypeError("Invalid user type")
  689. if not self.is_absolute():
  690. raise ValueError("user replacement is not allowed for relative URLs")
  691. return URL(
  692. self._val._replace(
  693. netloc=self._make_netloc(user, password, val.hostname, val.port)
  694. ),
  695. encoded=True,
  696. )
  697. def with_password(self, password):
  698. """Return a new URL with password replaced.
  699. Autoencode password if needed.
  700. Clear password if argument is None.
  701. """
  702. # N.B. doesn't cleanup query/fragment
  703. if password is None:
  704. pass
  705. elif isinstance(password, str):
  706. password = self._QUOTER(password)
  707. else:
  708. raise TypeError("Invalid password type")
  709. if not self.is_absolute():
  710. raise ValueError("password replacement is not allowed for relative URLs")
  711. val = self._val
  712. return URL(
  713. self._val._replace(
  714. netloc=self._make_netloc(val.username, password, val.hostname, val.port)
  715. ),
  716. encoded=True,
  717. )
  718. def with_host(self, host):
  719. """Return a new URL with host replaced.
  720. Autoencode host if needed.
  721. Changing host for relative URLs is not allowed, use .join()
  722. instead.
  723. """
  724. # N.B. doesn't cleanup query/fragment
  725. if not isinstance(host, str):
  726. raise TypeError("Invalid host type")
  727. if not self.is_absolute():
  728. raise ValueError("host replacement is not allowed for relative URLs")
  729. if not host:
  730. raise ValueError("host removing is not allowed")
  731. val = self._val
  732. return URL(
  733. self._val._replace(
  734. netloc=self._make_netloc(val.username, val.password, host, val.port)
  735. ),
  736. encoded=True,
  737. )
  738. def with_port(self, port):
  739. """Return a new URL with port replaced.
  740. Clear port to default if None is passed.
  741. """
  742. # N.B. doesn't cleanup query/fragment
  743. if port is not None:
  744. if isinstance(port, bool) or not isinstance(port, int):
  745. raise TypeError(f"port should be int or None, got {type(port)}")
  746. if port < 0 or port > 65535:
  747. raise ValueError(f"port must be between 0 and 65535, got {port}")
  748. if not self.is_absolute():
  749. raise ValueError("port replacement is not allowed for relative URLs")
  750. val = self._val
  751. return URL(
  752. self._val._replace(
  753. netloc=self._make_netloc(val.username, val.password, val.hostname, port)
  754. ),
  755. encoded=True,
  756. )
  757. def with_path(self, path, *, encoded=False):
  758. """Return a new URL with path replaced."""
  759. if not encoded:
  760. path = self._PATH_QUOTER(path)
  761. if self.is_absolute():
  762. path = self._normalize_path(path)
  763. if len(path) > 0 and path[0] != "/":
  764. path = "/" + path
  765. return URL(self._val._replace(path=path, query="", fragment=""), encoded=True)
  766. @classmethod
  767. def _query_seq_pairs(cls, quoter, pairs):
  768. for key, val in pairs:
  769. if isinstance(val, (list, tuple)):
  770. for v in val:
  771. yield quoter(key) + "=" + quoter(cls._query_var(v))
  772. else:
  773. yield quoter(key) + "=" + quoter(cls._query_var(val))
  774. @staticmethod
  775. def _query_var(v):
  776. cls = type(v)
  777. if issubclass(cls, str):
  778. return v
  779. if issubclass(cls, float):
  780. if math.isinf(v):
  781. raise ValueError("float('inf') is not supported")
  782. if math.isnan(v):
  783. raise ValueError("float('nan') is not supported")
  784. return str(float(v))
  785. if issubclass(cls, int) and cls is not bool:
  786. return str(int(v))
  787. raise TypeError(
  788. "Invalid variable type: value "
  789. "should be str, int or float, got {!r} "
  790. "of type {}".format(v, cls)
  791. )
  792. def _get_str_query(self, *args, **kwargs):
  793. if kwargs:
  794. if len(args) > 0:
  795. raise ValueError(
  796. "Either kwargs or single query parameter must be present"
  797. )
  798. query = kwargs
  799. elif len(args) == 1:
  800. query = args[0]
  801. else:
  802. raise ValueError("Either kwargs or single query parameter must be present")
  803. if query is None:
  804. query = None
  805. elif isinstance(query, Mapping):
  806. quoter = self._QUERY_PART_QUOTER
  807. query = "&".join(self._query_seq_pairs(quoter, query.items()))
  808. elif isinstance(query, str):
  809. query = self._QUERY_QUOTER(query)
  810. elif isinstance(query, (bytes, bytearray, memoryview)):
  811. raise TypeError(
  812. "Invalid query type: bytes, bytearray and memoryview are forbidden"
  813. )
  814. elif isinstance(query, Sequence):
  815. quoter = self._QUERY_PART_QUOTER
  816. # We don't expect sequence values if we're given a list of pairs
  817. # already; only mappings like builtin `dict` which can't have the
  818. # same key pointing to multiple values are allowed to use
  819. # `_query_seq_pairs`.
  820. query = "&".join(
  821. quoter(k) + "=" + quoter(self._query_var(v)) for k, v in query
  822. )
  823. else:
  824. raise TypeError(
  825. "Invalid query type: only str, mapping or "
  826. "sequence of (key, value) pairs is allowed"
  827. )
  828. return query
  829. def with_query(self, *args, **kwargs):
  830. """Return a new URL with query part replaced.
  831. Accepts any Mapping (e.g. dict, multidict.MultiDict instances)
  832. or str, autoencode the argument if needed.
  833. A sequence of (key, value) pairs is supported as well.
  834. It also can take an arbitrary number of keyword arguments.
  835. Clear query if None is passed.
  836. """
  837. # N.B. doesn't cleanup query/fragment
  838. new_query = self._get_str_query(*args, **kwargs) or ""
  839. return URL(
  840. self._val._replace(path=self._val.path, query=new_query), encoded=True
  841. )
  842. def update_query(self, *args, **kwargs):
  843. """Return a new URL with query part updated."""
  844. s = self._get_str_query(*args, **kwargs)
  845. query = None
  846. if s is not None:
  847. new_query = MultiDict(parse_qsl(s, keep_blank_values=True))
  848. query = MultiDict(self.query)
  849. query.update(new_query)
  850. return URL(
  851. self._val._replace(query=self._get_str_query(query) or ""), encoded=True
  852. )
  853. def with_fragment(self, fragment):
  854. """Return a new URL with fragment replaced.
  855. Autoencode fragment if needed.
  856. Clear fragment to default if None is passed.
  857. """
  858. # N.B. doesn't cleanup query/fragment
  859. if fragment is None:
  860. raw_fragment = ""
  861. elif not isinstance(fragment, str):
  862. raise TypeError("Invalid fragment type")
  863. else:
  864. raw_fragment = self._FRAGMENT_QUOTER(fragment)
  865. if self.raw_fragment == raw_fragment:
  866. return self
  867. return URL(self._val._replace(fragment=raw_fragment), encoded=True)
  868. def with_name(self, name):
  869. """Return a new URL with name (last part of path) replaced.
  870. Query and fragment parts are cleaned up.
  871. Name is encoded if needed.
  872. """
  873. # N.B. DOES cleanup query/fragment
  874. if not isinstance(name, str):
  875. raise TypeError("Invalid name type")
  876. if "/" in name:
  877. raise ValueError("Slash in name is not allowed")
  878. name = self._PATH_QUOTER(name)
  879. if name in (".", ".."):
  880. raise ValueError(". and .. values are forbidden")
  881. parts = list(self.raw_parts)
  882. if self.is_absolute():
  883. if len(parts) == 1:
  884. parts.append(name)
  885. else:
  886. parts[-1] = name
  887. parts[0] = "" # replace leading '/'
  888. else:
  889. parts[-1] = name
  890. if parts[0] == "/":
  891. parts[0] = "" # replace leading '/'
  892. return URL(
  893. self._val._replace(path="/".join(parts), query="", fragment=""),
  894. encoded=True,
  895. )
  896. def with_suffix(self, suffix):
  897. """Return a new URL with suffix (file extension of name) replaced.
  898. Query and fragment parts are cleaned up.
  899. suffix is encoded if needed.
  900. """
  901. if not isinstance(suffix, str):
  902. raise TypeError("Invalid suffix type")
  903. if suffix and not suffix.startswith(".") or suffix == ".":
  904. raise ValueError(f"Invalid suffix {suffix!r}")
  905. name = self.raw_name
  906. if not name:
  907. raise ValueError(f"{self!r} has an empty name")
  908. old_suffix = self.raw_suffix
  909. if not old_suffix:
  910. name = name + suffix
  911. else:
  912. name = name[: -len(old_suffix)] + suffix
  913. return self.with_name(name)
  914. def join(self, url):
  915. """Join URLs
  916. Construct a full (“absolute”) URL by combining a “base URL”
  917. (self) with another URL (url).
  918. Informally, this uses components of the base URL, in
  919. particular the addressing scheme, the network location and
  920. (part of) the path, to provide missing components in the
  921. relative URL.
  922. """
  923. # See docs for urllib.parse.urljoin
  924. if not isinstance(url, URL):
  925. raise TypeError("url should be URL")
  926. return URL(urljoin(str(self), str(url)), encoded=True)
  927. def joinpath(self, *other, encoded=False):
  928. """Return a new URL with the elements in other appended to the path."""
  929. return self._make_child(other, encoded=encoded)
  930. def human_repr(self):
  931. """Return decoded human readable string for URL representation."""
  932. user = _human_quote(self.user, "#/:?@[]")
  933. password = _human_quote(self.password, "#/:?@[]")
  934. host = self.host
  935. if host:
  936. host = self._encode_host(self.host, human=True)
  937. path = _human_quote(self.path, "#?")
  938. query_string = "&".join(
  939. "{}={}".format(_human_quote(k, "#&+;="), _human_quote(v, "#&+;="))
  940. for k, v in self.query.items()
  941. )
  942. fragment = _human_quote(self.fragment, "")
  943. return urlunsplit(
  944. SplitResult(
  945. self.scheme,
  946. self._make_netloc(
  947. user,
  948. password,
  949. host,
  950. self._val.port,
  951. encode_host=False,
  952. ),
  953. path,
  954. query_string,
  955. fragment,
  956. )
  957. )
  958. def _human_quote(s, unsafe):
  959. if not s:
  960. return s
  961. for c in "%" + unsafe:
  962. if c in s:
  963. s = s.replace(c, f"%{ord(c):02X}")
  964. if s.isprintable():
  965. return s
  966. return "".join(c if c.isprintable() else quote(c) for c in s)
  967. _MAXCACHE = 256
  968. @functools.lru_cache(_MAXCACHE)
  969. def _idna_decode(raw):
  970. try:
  971. return idna.decode(raw.encode("ascii"))
  972. except UnicodeError: # e.g. '::1'
  973. return raw.encode("ascii").decode("idna")
  974. @functools.lru_cache(_MAXCACHE)
  975. def _idna_encode(host):
  976. try:
  977. return idna.encode(host, uts46=True).decode("ascii")
  978. except UnicodeError:
  979. return host.encode("idna").decode("ascii")
  980. @rewrite_module
  981. def cache_clear():
  982. _idna_decode.cache_clear()
  983. _idna_encode.cache_clear()
  984. @rewrite_module
  985. def cache_info():
  986. return {
  987. "idna_encode": _idna_encode.cache_info(),
  988. "idna_decode": _idna_decode.cache_info(),
  989. }
  990. @rewrite_module
  991. def cache_configure(*, idna_encode_size=_MAXCACHE, idna_decode_size=_MAXCACHE):
  992. global _idna_decode, _idna_encode
  993. _idna_encode = functools.lru_cache(idna_encode_size)(_idna_encode.__wrapped__)
  994. _idna_decode = functools.lru_cache(idna_decode_size)(_idna_decode.__wrapped__)