_websockets.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. from __future__ import annotations
  2. import contextlib
  3. import functools
  4. import io
  5. import logging
  6. import ssl
  7. import sys
  8. from ._helper import (
  9. create_connection,
  10. create_socks_proxy_socket,
  11. make_socks_proxy_opts,
  12. select_proxy,
  13. )
  14. from .common import Features, Response, register_rh
  15. from .exceptions import (
  16. CertificateVerifyError,
  17. HTTPError,
  18. ProxyError,
  19. RequestError,
  20. SSLError,
  21. TransportError,
  22. )
  23. from .websocket import WebSocketRequestHandler, WebSocketResponse
  24. from ..dependencies import websockets
  25. from ..socks import ProxyError as SocksProxyError
  26. from ..utils import int_or_none
  27. if not websockets:
  28. raise ImportError('websockets is not installed')
  29. import websockets.version
  30. websockets_version = tuple(map(int_or_none, websockets.version.version.split('.')))
  31. if websockets_version < (12, 0):
  32. raise ImportError('Only websockets>=12.0 is supported')
  33. import websockets.sync.client
  34. from websockets.uri import parse_uri
  35. # In websockets Connection, recv_exc and recv_events_exc are defined
  36. # after the recv events handler thread is started [1].
  37. # On our CI using PyPy, in some cases a race condition may occur
  38. # where the recv events handler thread tries to use these attributes before they are defined [2].
  39. # 1: https://github.com/python-websockets/websockets/blame/de768cf65e7e2b1a3b67854fb9e08816a5ff7050/src/websockets/sync/connection.py#L93
  40. # 2: "AttributeError: 'ClientConnection' object has no attribute 'recv_events_exc'. Did you mean: 'recv_events'?"
  41. import websockets.sync.connection # isort: split
  42. with contextlib.suppress(Exception):
  43. # > 12.0
  44. websockets.sync.connection.Connection.recv_exc = None
  45. # 12.0
  46. websockets.sync.connection.Connection.recv_events_exc = None
  47. class WebsocketsResponseAdapter(WebSocketResponse):
  48. def __init__(self, ws: websockets.sync.client.ClientConnection, url):
  49. super().__init__(
  50. fp=io.BytesIO(ws.response.body or b''),
  51. url=url,
  52. headers=ws.response.headers,
  53. status=ws.response.status_code,
  54. reason=ws.response.reason_phrase,
  55. )
  56. self._ws = ws
  57. def close(self):
  58. self._ws.close()
  59. super().close()
  60. def send(self, message):
  61. # https://websockets.readthedocs.io/en/stable/reference/sync/client.html#websockets.sync.client.ClientConnection.send
  62. try:
  63. return self._ws.send(message)
  64. except (websockets.exceptions.WebSocketException, RuntimeError, TimeoutError) as e:
  65. raise TransportError(cause=e) from e
  66. except SocksProxyError as e:
  67. raise ProxyError(cause=e) from e
  68. except TypeError as e:
  69. raise RequestError(cause=e) from e
  70. def recv(self):
  71. # https://websockets.readthedocs.io/en/stable/reference/sync/client.html#websockets.sync.client.ClientConnection.recv
  72. try:
  73. return self._ws.recv()
  74. except SocksProxyError as e:
  75. raise ProxyError(cause=e) from e
  76. except (websockets.exceptions.WebSocketException, RuntimeError, TimeoutError) as e:
  77. raise TransportError(cause=e) from e
  78. @register_rh
  79. class WebsocketsRH(WebSocketRequestHandler):
  80. """
  81. Websockets request handler
  82. https://websockets.readthedocs.io
  83. https://github.com/python-websockets/websockets
  84. """
  85. _SUPPORTED_URL_SCHEMES = ('wss', 'ws')
  86. _SUPPORTED_PROXY_SCHEMES = ('socks4', 'socks4a', 'socks5', 'socks5h')
  87. _SUPPORTED_FEATURES = (Features.ALL_PROXY, Features.NO_PROXY)
  88. RH_NAME = 'websockets'
  89. def __init__(self, *args, **kwargs):
  90. super().__init__(*args, **kwargs)
  91. self.__logging_handlers = {}
  92. for name in ('websockets.client', 'websockets.server'):
  93. logger = logging.getLogger(name)
  94. handler = logging.StreamHandler(stream=sys.stdout)
  95. handler.setFormatter(logging.Formatter(f'{self.RH_NAME}: %(message)s'))
  96. self.__logging_handlers[name] = handler
  97. logger.addHandler(handler)
  98. if self.verbose:
  99. logger.setLevel(logging.DEBUG)
  100. def _check_extensions(self, extensions):
  101. super()._check_extensions(extensions)
  102. extensions.pop('timeout', None)
  103. extensions.pop('cookiejar', None)
  104. def close(self):
  105. # Remove the logging handler that contains a reference to our logger
  106. # See: https://github.com/yt-dlp/yt-dlp/issues/8922
  107. for name, handler in self.__logging_handlers.items():
  108. logging.getLogger(name).removeHandler(handler)
  109. def _send(self, request):
  110. timeout = self._calculate_timeout(request)
  111. headers = self._merge_headers(request.headers)
  112. if 'cookie' not in headers:
  113. cookiejar = self._get_cookiejar(request)
  114. cookie_header = cookiejar.get_cookie_header(request.url)
  115. if cookie_header:
  116. headers['cookie'] = cookie_header
  117. wsuri = parse_uri(request.url)
  118. create_conn_kwargs = {
  119. 'source_address': (self.source_address, 0) if self.source_address else None,
  120. 'timeout': timeout,
  121. }
  122. proxy = select_proxy(request.url, self._get_proxies(request))
  123. try:
  124. if proxy:
  125. socks_proxy_options = make_socks_proxy_opts(proxy)
  126. sock = create_connection(
  127. address=(socks_proxy_options['addr'], socks_proxy_options['port']),
  128. _create_socket_func=functools.partial(
  129. create_socks_proxy_socket, (wsuri.host, wsuri.port), socks_proxy_options),
  130. **create_conn_kwargs,
  131. )
  132. else:
  133. sock = create_connection(
  134. address=(wsuri.host, wsuri.port),
  135. **create_conn_kwargs,
  136. )
  137. conn = websockets.sync.client.connect(
  138. sock=sock,
  139. uri=request.url,
  140. additional_headers=headers,
  141. open_timeout=timeout,
  142. user_agent_header=None,
  143. ssl_context=self._make_sslcontext() if wsuri.secure else None,
  144. close_timeout=0, # not ideal, but prevents yt-dlp hanging
  145. )
  146. return WebsocketsResponseAdapter(conn, url=request.url)
  147. # Exceptions as per https://websockets.readthedocs.io/en/stable/reference/sync/client.html
  148. except SocksProxyError as e:
  149. raise ProxyError(cause=e) from e
  150. except websockets.exceptions.InvalidURI as e:
  151. raise RequestError(cause=e) from e
  152. except ssl.SSLCertVerificationError as e:
  153. raise CertificateVerifyError(cause=e) from e
  154. except ssl.SSLError as e:
  155. raise SSLError(cause=e) from e
  156. except websockets.exceptions.InvalidStatus as e:
  157. raise HTTPError(
  158. Response(
  159. fp=io.BytesIO(e.response.body),
  160. url=request.url,
  161. headers=e.response.headers,
  162. status=e.response.status_code,
  163. reason=e.response.reason_phrase),
  164. ) from e
  165. except (OSError, TimeoutError, websockets.exceptions.WebSocketException) as e:
  166. raise TransportError(cause=e) from e