_http.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. """
  2. _http.py
  3. websocket - WebSocket client library for Python
  4. Copyright 2024 engn33r
  5. Licensed under the Apache License, Version 2.0 (the "License");
  6. you may not use this file except in compliance with the License.
  7. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. """
  15. import errno
  16. import os
  17. import socket
  18. from base64 import encodebytes as base64encode
  19. from ._exceptions import (
  20. WebSocketAddressException,
  21. WebSocketException,
  22. WebSocketProxyException,
  23. )
  24. from ._logging import debug, dump, trace
  25. from ._socket import DEFAULT_SOCKET_OPTION, recv_line, send
  26. from ._ssl_compat import HAVE_SSL, ssl
  27. from ._url import get_proxy_info, parse_url
  28. __all__ = ["proxy_info", "connect", "read_headers"]
  29. try:
  30. from python_socks._errors import *
  31. from python_socks._types import ProxyType
  32. from python_socks.sync import Proxy
  33. HAVE_PYTHON_SOCKS = True
  34. except:
  35. HAVE_PYTHON_SOCKS = False
  36. class ProxyError(Exception):
  37. pass
  38. class ProxyTimeoutError(Exception):
  39. pass
  40. class ProxyConnectionError(Exception):
  41. pass
  42. class proxy_info:
  43. def __init__(self, **options):
  44. self.proxy_host = options.get("http_proxy_host", None)
  45. if self.proxy_host:
  46. self.proxy_port = options.get("http_proxy_port", 0)
  47. self.auth = options.get("http_proxy_auth", None)
  48. self.no_proxy = options.get("http_no_proxy", None)
  49. self.proxy_protocol = options.get("proxy_type", "http")
  50. # Note: If timeout not specified, default python-socks timeout is 60 seconds
  51. self.proxy_timeout = options.get("http_proxy_timeout", None)
  52. if self.proxy_protocol not in [
  53. "http",
  54. "socks4",
  55. "socks4a",
  56. "socks5",
  57. "socks5h",
  58. ]:
  59. raise ProxyError(
  60. "Only http, socks4, socks5 proxy protocols are supported"
  61. )
  62. else:
  63. self.proxy_port = 0
  64. self.auth = None
  65. self.no_proxy = None
  66. self.proxy_protocol = "http"
  67. def _start_proxied_socket(url: str, options, proxy) -> tuple:
  68. if not HAVE_PYTHON_SOCKS:
  69. raise WebSocketException(
  70. "Python Socks is needed for SOCKS proxying but is not available"
  71. )
  72. hostname, port, resource, is_secure = parse_url(url)
  73. if proxy.proxy_protocol == "socks4":
  74. rdns = False
  75. proxy_type = ProxyType.SOCKS4
  76. # socks4a sends DNS through proxy
  77. elif proxy.proxy_protocol == "socks4a":
  78. rdns = True
  79. proxy_type = ProxyType.SOCKS4
  80. elif proxy.proxy_protocol == "socks5":
  81. rdns = False
  82. proxy_type = ProxyType.SOCKS5
  83. # socks5h sends DNS through proxy
  84. elif proxy.proxy_protocol == "socks5h":
  85. rdns = True
  86. proxy_type = ProxyType.SOCKS5
  87. ws_proxy = Proxy.create(
  88. proxy_type=proxy_type,
  89. host=proxy.proxy_host,
  90. port=int(proxy.proxy_port),
  91. username=proxy.auth[0] if proxy.auth else None,
  92. password=proxy.auth[1] if proxy.auth else None,
  93. rdns=rdns,
  94. )
  95. sock = ws_proxy.connect(hostname, port, timeout=proxy.proxy_timeout)
  96. if is_secure:
  97. if HAVE_SSL:
  98. sock = _ssl_socket(sock, options.sslopt, hostname)
  99. else:
  100. raise WebSocketException("SSL not available.")
  101. return sock, (hostname, port, resource)
  102. def connect(url: str, options, proxy, socket):
  103. # Use _start_proxied_socket() only for socks4 or socks5 proxy
  104. # Use _tunnel() for http proxy
  105. # TODO: Use python-socks for http protocol also, to standardize flow
  106. if proxy.proxy_host and not socket and proxy.proxy_protocol != "http":
  107. return _start_proxied_socket(url, options, proxy)
  108. hostname, port_from_url, resource, is_secure = parse_url(url)
  109. if socket:
  110. return socket, (hostname, port_from_url, resource)
  111. addrinfo_list, need_tunnel, auth = _get_addrinfo_list(
  112. hostname, port_from_url, is_secure, proxy
  113. )
  114. if not addrinfo_list:
  115. raise WebSocketException(f"Host not found.: {hostname}:{port_from_url}")
  116. sock = None
  117. try:
  118. sock = _open_socket(addrinfo_list, options.sockopt, options.timeout)
  119. if need_tunnel:
  120. sock = _tunnel(sock, hostname, port_from_url, auth)
  121. if is_secure:
  122. if HAVE_SSL:
  123. sock = _ssl_socket(sock, options.sslopt, hostname)
  124. else:
  125. raise WebSocketException("SSL not available.")
  126. return sock, (hostname, port_from_url, resource)
  127. except:
  128. if sock:
  129. sock.close()
  130. raise
  131. def _get_addrinfo_list(hostname, port: int, is_secure: bool, proxy) -> tuple:
  132. phost, pport, pauth = get_proxy_info(
  133. hostname,
  134. is_secure,
  135. proxy.proxy_host,
  136. proxy.proxy_port,
  137. proxy.auth,
  138. proxy.no_proxy,
  139. )
  140. try:
  141. # when running on windows 10, getaddrinfo without socktype returns a socktype 0.
  142. # This generates an error exception: `_on_error: exception Socket type must be stream or datagram, not 0`
  143. # or `OSError: [Errno 22] Invalid argument` when creating socket. Force the socket type to SOCK_STREAM.
  144. if not phost:
  145. addrinfo_list = socket.getaddrinfo(
  146. hostname, port, 0, socket.SOCK_STREAM, socket.SOL_TCP
  147. )
  148. return addrinfo_list, False, None
  149. else:
  150. pport = pport and pport or 80
  151. # when running on windows 10, the getaddrinfo used above
  152. # returns a socktype 0. This generates an error exception:
  153. # _on_error: exception Socket type must be stream or datagram, not 0
  154. # Force the socket type to SOCK_STREAM
  155. addrinfo_list = socket.getaddrinfo(
  156. phost, pport, 0, socket.SOCK_STREAM, socket.SOL_TCP
  157. )
  158. return addrinfo_list, True, pauth
  159. except socket.gaierror as e:
  160. raise WebSocketAddressException(e)
  161. def _open_socket(addrinfo_list, sockopt, timeout):
  162. err = None
  163. for addrinfo in addrinfo_list:
  164. family, socktype, proto = addrinfo[:3]
  165. sock = socket.socket(family, socktype, proto)
  166. sock.settimeout(timeout)
  167. for opts in DEFAULT_SOCKET_OPTION:
  168. sock.setsockopt(*opts)
  169. for opts in sockopt:
  170. sock.setsockopt(*opts)
  171. address = addrinfo[4]
  172. err = None
  173. while not err:
  174. try:
  175. sock.connect(address)
  176. except socket.error as error:
  177. sock.close()
  178. error.remote_ip = str(address[0])
  179. try:
  180. eConnRefused = (
  181. errno.ECONNREFUSED,
  182. errno.WSAECONNREFUSED,
  183. errno.ENETUNREACH,
  184. )
  185. except AttributeError:
  186. eConnRefused = (errno.ECONNREFUSED, errno.ENETUNREACH)
  187. if error.errno not in eConnRefused:
  188. raise error
  189. err = error
  190. continue
  191. else:
  192. break
  193. else:
  194. continue
  195. break
  196. else:
  197. if err:
  198. raise err
  199. return sock
  200. def _wrap_sni_socket(sock: socket.socket, sslopt: dict, hostname, check_hostname):
  201. context = sslopt.get("context", None)
  202. if not context:
  203. context = ssl.SSLContext(sslopt.get("ssl_version", ssl.PROTOCOL_TLS_CLIENT))
  204. # Non default context need to manually enable SSLKEYLOGFILE support by setting the keylog_filename attribute.
  205. # For more details see also:
  206. # * https://docs.python.org/3.8/library/ssl.html?highlight=sslkeylogfile#context-creation
  207. # * https://docs.python.org/3.8/library/ssl.html?highlight=sslkeylogfile#ssl.SSLContext.keylog_filename
  208. context.keylog_filename = os.environ.get("SSLKEYLOGFILE", None)
  209. if sslopt.get("cert_reqs", ssl.CERT_NONE) != ssl.CERT_NONE:
  210. cafile = sslopt.get("ca_certs", None)
  211. capath = sslopt.get("ca_cert_path", None)
  212. if cafile or capath:
  213. context.load_verify_locations(cafile=cafile, capath=capath)
  214. elif hasattr(context, "load_default_certs"):
  215. context.load_default_certs(ssl.Purpose.SERVER_AUTH)
  216. if sslopt.get("certfile", None):
  217. context.load_cert_chain(
  218. sslopt["certfile"],
  219. sslopt.get("keyfile", None),
  220. sslopt.get("password", None),
  221. )
  222. # Python 3.10 switch to PROTOCOL_TLS_CLIENT defaults to "cert_reqs = ssl.CERT_REQUIRED" and "check_hostname = True"
  223. # If both disabled, set check_hostname before verify_mode
  224. # see https://github.com/liris/websocket-client/commit/b96a2e8fa765753e82eea531adb19716b52ca3ca#commitcomment-10803153
  225. if sslopt.get("cert_reqs", ssl.CERT_NONE) == ssl.CERT_NONE and not sslopt.get(
  226. "check_hostname", False
  227. ):
  228. context.check_hostname = False
  229. context.verify_mode = ssl.CERT_NONE
  230. else:
  231. context.check_hostname = sslopt.get("check_hostname", True)
  232. context.verify_mode = sslopt.get("cert_reqs", ssl.CERT_REQUIRED)
  233. if "ciphers" in sslopt:
  234. context.set_ciphers(sslopt["ciphers"])
  235. if "cert_chain" in sslopt:
  236. certfile, keyfile, password = sslopt["cert_chain"]
  237. context.load_cert_chain(certfile, keyfile, password)
  238. if "ecdh_curve" in sslopt:
  239. context.set_ecdh_curve(sslopt["ecdh_curve"])
  240. return context.wrap_socket(
  241. sock,
  242. do_handshake_on_connect=sslopt.get("do_handshake_on_connect", True),
  243. suppress_ragged_eofs=sslopt.get("suppress_ragged_eofs", True),
  244. server_hostname=hostname,
  245. )
  246. def _ssl_socket(sock: socket.socket, user_sslopt: dict, hostname):
  247. sslopt: dict = {"cert_reqs": ssl.CERT_REQUIRED}
  248. sslopt.update(user_sslopt)
  249. cert_path = os.environ.get("WEBSOCKET_CLIENT_CA_BUNDLE")
  250. if (
  251. cert_path
  252. and os.path.isfile(cert_path)
  253. and user_sslopt.get("ca_certs", None) is None
  254. ):
  255. sslopt["ca_certs"] = cert_path
  256. elif (
  257. cert_path
  258. and os.path.isdir(cert_path)
  259. and user_sslopt.get("ca_cert_path", None) is None
  260. ):
  261. sslopt["ca_cert_path"] = cert_path
  262. if sslopt.get("server_hostname", None):
  263. hostname = sslopt["server_hostname"]
  264. check_hostname = sslopt.get("check_hostname", True)
  265. sock = _wrap_sni_socket(sock, sslopt, hostname, check_hostname)
  266. return sock
  267. def _tunnel(sock: socket.socket, host, port: int, auth) -> socket.socket:
  268. debug("Connecting proxy...")
  269. connect_header = f"CONNECT {host}:{port} HTTP/1.1\r\n"
  270. connect_header += f"Host: {host}:{port}\r\n"
  271. # TODO: support digest auth.
  272. if auth and auth[0]:
  273. auth_str = auth[0]
  274. if auth[1]:
  275. auth_str += f":{auth[1]}"
  276. encoded_str = base64encode(auth_str.encode()).strip().decode().replace("\n", "")
  277. connect_header += f"Proxy-Authorization: Basic {encoded_str}\r\n"
  278. connect_header += "\r\n"
  279. dump("request header", connect_header)
  280. send(sock, connect_header)
  281. try:
  282. status, _, _ = read_headers(sock)
  283. except Exception as e:
  284. raise WebSocketProxyException(str(e))
  285. if status != 200:
  286. raise WebSocketProxyException(f"failed CONNECT via proxy status: {status}")
  287. return sock
  288. def read_headers(sock: socket.socket) -> tuple:
  289. status = None
  290. status_message = None
  291. headers: dict = {}
  292. trace("--- response header ---")
  293. while True:
  294. line = recv_line(sock)
  295. line = line.decode("utf-8").strip()
  296. if not line:
  297. break
  298. trace(line)
  299. if not status:
  300. status_info = line.split(" ", 2)
  301. status = int(status_info[1])
  302. if len(status_info) > 2:
  303. status_message = status_info[2]
  304. else:
  305. kv = line.split(":", 1)
  306. if len(kv) != 2:
  307. raise WebSocketException("Invalid header")
  308. key, value = kv
  309. if key.lower() == "set-cookie" and headers.get("set-cookie"):
  310. headers["set-cookie"] = headers.get("set-cookie") + "; " + value.strip()
  311. else:
  312. headers[key.lower()] = value.strip()
  313. trace("-----------------------")
  314. return status, headers, status_message