client.py 56 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550
  1. r"""HTTP/1.1 client library
  2. <intro stuff goes here>
  3. <other stuff, too>
  4. HTTPConnection goes through a number of "states", which define when a client
  5. may legally make another request or fetch the response for a particular
  6. request. This diagram details these state transitions:
  7. (null)
  8. |
  9. | HTTPConnection()
  10. v
  11. Idle
  12. |
  13. | putrequest()
  14. v
  15. Request-started
  16. |
  17. | ( putheader() )* endheaders()
  18. v
  19. Request-sent
  20. |\_____________________________
  21. | | getresponse() raises
  22. | response = getresponse() | ConnectionError
  23. v v
  24. Unread-response Idle
  25. [Response-headers-read]
  26. |\____________________
  27. | |
  28. | response.read() | putrequest()
  29. v v
  30. Idle Req-started-unread-response
  31. ______/|
  32. / |
  33. response.read() | | ( putheader() )* endheaders()
  34. v v
  35. Request-started Req-sent-unread-response
  36. |
  37. | response.read()
  38. v
  39. Request-sent
  40. This diagram presents the following rules:
  41. -- a second request may not be started until {response-headers-read}
  42. -- a response [object] cannot be retrieved until {request-sent}
  43. -- there is no differentiation between an unread response body and a
  44. partially read response body
  45. Note: this enforcement is applied by the HTTPConnection class. The
  46. HTTPResponse class does not enforce this state machine, which
  47. implies sophisticated clients may accelerate the request/response
  48. pipeline. Caution should be taken, though: accelerating the states
  49. beyond the above pattern may imply knowledge of the server's
  50. connection-close behavior for certain requests. For example, it
  51. is impossible to tell whether the server will close the connection
  52. UNTIL the response headers have been read; this means that further
  53. requests cannot be placed into the pipeline until it is known that
  54. the server will NOT be closing the connection.
  55. Logical State __state __response
  56. ------------- ------- ----------
  57. Idle _CS_IDLE None
  58. Request-started _CS_REQ_STARTED None
  59. Request-sent _CS_REQ_SENT None
  60. Unread-response _CS_IDLE <response_class>
  61. Req-started-unread-response _CS_REQ_STARTED <response_class>
  62. Req-sent-unread-response _CS_REQ_SENT <response_class>
  63. """
  64. import email.parser
  65. import email.message
  66. import errno
  67. import http
  68. import io
  69. import re
  70. import socket
  71. import sys
  72. import collections.abc
  73. from urllib.parse import urlsplit
  74. # HTTPMessage, parse_headers(), and the HTTP status code constants are
  75. # intentionally omitted for simplicity
  76. __all__ = ["HTTPResponse", "HTTPConnection",
  77. "HTTPException", "NotConnected", "UnknownProtocol",
  78. "UnknownTransferEncoding", "UnimplementedFileMode",
  79. "IncompleteRead", "InvalidURL", "ImproperConnectionState",
  80. "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
  81. "BadStatusLine", "LineTooLong", "RemoteDisconnected", "error",
  82. "responses"]
  83. HTTP_PORT = 80
  84. HTTPS_PORT = 443
  85. _UNKNOWN = 'UNKNOWN'
  86. # connection states
  87. _CS_IDLE = 'Idle'
  88. _CS_REQ_STARTED = 'Request-started'
  89. _CS_REQ_SENT = 'Request-sent'
  90. # hack to maintain backwards compatibility
  91. globals().update(http.HTTPStatus.__members__)
  92. # another hack to maintain backwards compatibility
  93. # Mapping status codes to official W3C names
  94. responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()}
  95. # maximal line length when calling readline().
  96. _MAXLINE = 65536
  97. _MAXHEADERS = 100
  98. # Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
  99. #
  100. # VCHAR = %x21-7E
  101. # obs-text = %x80-FF
  102. # header-field = field-name ":" OWS field-value OWS
  103. # field-name = token
  104. # field-value = *( field-content / obs-fold )
  105. # field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
  106. # field-vchar = VCHAR / obs-text
  107. #
  108. # obs-fold = CRLF 1*( SP / HTAB )
  109. # ; obsolete line folding
  110. # ; see Section 3.2.4
  111. # token = 1*tchar
  112. #
  113. # tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
  114. # / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
  115. # / DIGIT / ALPHA
  116. # ; any VCHAR, except delimiters
  117. #
  118. # VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
  119. # the patterns for both name and value are more lenient than RFC
  120. # definitions to allow for backwards compatibility
  121. _is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
  122. _is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search
  123. # These characters are not allowed within HTTP URL paths.
  124. # See https://tools.ietf.org/html/rfc3986#section-3.3 and the
  125. # https://tools.ietf.org/html/rfc3986#appendix-A pchar definition.
  126. # Prevents CVE-2019-9740. Includes control characters such as \r\n.
  127. # We don't restrict chars above \x7f as putrequest() limits us to ASCII.
  128. _contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]')
  129. # Arguably only these _should_ allowed:
  130. # _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")
  131. # We are more lenient for assumed real world compatibility purposes.
  132. # These characters are not allowed within HTTP method names
  133. # to prevent http header injection.
  134. _contains_disallowed_method_pchar_re = re.compile('[\x00-\x1f]')
  135. # We always set the Content-Length header for these methods because some
  136. # servers will otherwise respond with a 411
  137. _METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
  138. def _encode(data, name='data'):
  139. """Call data.encode("latin-1") but show a better error message."""
  140. try:
  141. return data.encode("latin-1")
  142. except UnicodeEncodeError as err:
  143. raise UnicodeEncodeError(
  144. err.encoding,
  145. err.object,
  146. err.start,
  147. err.end,
  148. "%s (%.20r) is not valid Latin-1. Use %s.encode('utf-8') "
  149. "if you want to send it encoded in UTF-8." %
  150. (name.title(), data[err.start:err.end], name)) from None
  151. def _strip_ipv6_iface(enc_name: bytes) -> bytes:
  152. """Remove interface scope from IPv6 address."""
  153. enc_name, percent, _ = enc_name.partition(b"%")
  154. if percent:
  155. assert enc_name.startswith(b'['), enc_name
  156. enc_name += b']'
  157. return enc_name
  158. class HTTPMessage(email.message.Message):
  159. # XXX The only usage of this method is in
  160. # http.server.CGIHTTPRequestHandler. Maybe move the code there so
  161. # that it doesn't need to be part of the public API. The API has
  162. # never been defined so this could cause backwards compatibility
  163. # issues.
  164. def getallmatchingheaders(self, name):
  165. """Find all header lines matching a given header name.
  166. Look through the list of headers and find all lines matching a given
  167. header name (and their continuation lines). A list of the lines is
  168. returned, without interpretation. If the header does not occur, an
  169. empty list is returned. If the header occurs multiple times, all
  170. occurrences are returned. Case is not important in the header name.
  171. """
  172. name = name.lower() + ':'
  173. n = len(name)
  174. lst = []
  175. hit = 0
  176. for line in self.keys():
  177. if line[:n].lower() == name:
  178. hit = 1
  179. elif not line[:1].isspace():
  180. hit = 0
  181. if hit:
  182. lst.append(line)
  183. return lst
  184. def _read_headers(fp):
  185. """Reads potential header lines into a list from a file pointer.
  186. Length of line is limited by _MAXLINE, and number of
  187. headers is limited by _MAXHEADERS.
  188. """
  189. headers = []
  190. while True:
  191. line = fp.readline(_MAXLINE + 1)
  192. if len(line) > _MAXLINE:
  193. raise LineTooLong("header line")
  194. headers.append(line)
  195. if len(headers) > _MAXHEADERS:
  196. raise HTTPException("got more than %d headers" % _MAXHEADERS)
  197. if line in (b'\r\n', b'\n', b''):
  198. break
  199. return headers
  200. def _parse_header_lines(header_lines, _class=HTTPMessage):
  201. """
  202. Parses only RFC2822 headers from header lines.
  203. email Parser wants to see strings rather than bytes.
  204. But a TextIOWrapper around self.rfile would buffer too many bytes
  205. from the stream, bytes which we later need to read as bytes.
  206. So we read the correct bytes here, as bytes, for email Parser
  207. to parse.
  208. """
  209. hstring = b''.join(header_lines).decode('iso-8859-1')
  210. return email.parser.Parser(_class=_class).parsestr(hstring)
  211. def parse_headers(fp, _class=HTTPMessage):
  212. """Parses only RFC2822 headers from a file pointer."""
  213. headers = _read_headers(fp)
  214. return _parse_header_lines(headers, _class)
  215. class HTTPResponse(io.BufferedIOBase):
  216. # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
  217. # The bytes from the socket object are iso-8859-1 strings.
  218. # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
  219. # text following RFC 2047. The basic status line parsing only
  220. # accepts iso-8859-1.
  221. def __init__(self, sock, debuglevel=0, method=None, url=None):
  222. # If the response includes a content-length header, we need to
  223. # make sure that the client doesn't read more than the
  224. # specified number of bytes. If it does, it will block until
  225. # the server times out and closes the connection. This will
  226. # happen if a self.fp.read() is done (without a size) whether
  227. # self.fp is buffered or not. So, no self.fp.read() by
  228. # clients unless they know what they are doing.
  229. self.fp = sock.makefile("rb")
  230. self.debuglevel = debuglevel
  231. self._method = method
  232. # The HTTPResponse object is returned via urllib. The clients
  233. # of http and urllib expect different attributes for the
  234. # headers. headers is used here and supports urllib. msg is
  235. # provided as a backwards compatibility layer for http
  236. # clients.
  237. self.headers = self.msg = None
  238. # from the Status-Line of the response
  239. self.version = _UNKNOWN # HTTP-Version
  240. self.status = _UNKNOWN # Status-Code
  241. self.reason = _UNKNOWN # Reason-Phrase
  242. self.chunked = _UNKNOWN # is "chunked" being used?
  243. self.chunk_left = _UNKNOWN # bytes left to read in current chunk
  244. self.length = _UNKNOWN # number of bytes left in response
  245. self.will_close = _UNKNOWN # conn will close at end of response
  246. def _read_status(self):
  247. line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  248. if len(line) > _MAXLINE:
  249. raise LineTooLong("status line")
  250. if self.debuglevel > 0:
  251. print("reply:", repr(line))
  252. if not line:
  253. # Presumably, the server closed the connection before
  254. # sending a valid response.
  255. raise RemoteDisconnected("Remote end closed connection without"
  256. " response")
  257. try:
  258. version, status, reason = line.split(None, 2)
  259. except ValueError:
  260. try:
  261. version, status = line.split(None, 1)
  262. reason = ""
  263. except ValueError:
  264. # empty version will cause next test to fail.
  265. version = ""
  266. if not version.startswith("HTTP/"):
  267. self._close_conn()
  268. raise BadStatusLine(line)
  269. # The status code is a three-digit number
  270. try:
  271. status = int(status)
  272. if status < 100 or status > 999:
  273. raise BadStatusLine(line)
  274. except ValueError:
  275. raise BadStatusLine(line)
  276. return version, status, reason
  277. def begin(self):
  278. if self.headers is not None:
  279. # we've already started reading the response
  280. return
  281. # read until we get a non-100 response
  282. while True:
  283. version, status, reason = self._read_status()
  284. if status != CONTINUE:
  285. break
  286. # skip the header from the 100 response
  287. skipped_headers = _read_headers(self.fp)
  288. if self.debuglevel > 0:
  289. print("headers:", skipped_headers)
  290. del skipped_headers
  291. self.code = self.status = status
  292. self.reason = reason.strip()
  293. if version in ("HTTP/1.0", "HTTP/0.9"):
  294. # Some servers might still return "0.9", treat it as 1.0 anyway
  295. self.version = 10
  296. elif version.startswith("HTTP/1."):
  297. self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
  298. else:
  299. raise UnknownProtocol(version)
  300. self.headers = self.msg = parse_headers(self.fp)
  301. if self.debuglevel > 0:
  302. for hdr, val in self.headers.items():
  303. print("header:", hdr + ":", val)
  304. # are we using the chunked-style of transfer encoding?
  305. tr_enc = self.headers.get("transfer-encoding")
  306. if tr_enc and tr_enc.lower() == "chunked":
  307. self.chunked = True
  308. self.chunk_left = None
  309. else:
  310. self.chunked = False
  311. # will the connection close at the end of the response?
  312. self.will_close = self._check_close()
  313. # do we have a Content-Length?
  314. # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
  315. self.length = None
  316. length = self.headers.get("content-length")
  317. if length and not self.chunked:
  318. try:
  319. self.length = int(length)
  320. except ValueError:
  321. self.length = None
  322. else:
  323. if self.length < 0: # ignore nonsensical negative lengths
  324. self.length = None
  325. else:
  326. self.length = None
  327. # does the body have a fixed length? (of zero)
  328. if (status == NO_CONTENT or status == NOT_MODIFIED or
  329. 100 <= status < 200 or # 1xx codes
  330. self._method == "HEAD"):
  331. self.length = 0
  332. # if the connection remains open, and we aren't using chunked, and
  333. # a content-length was not provided, then assume that the connection
  334. # WILL close.
  335. if (not self.will_close and
  336. not self.chunked and
  337. self.length is None):
  338. self.will_close = True
  339. def _check_close(self):
  340. conn = self.headers.get("connection")
  341. if self.version == 11:
  342. # An HTTP/1.1 proxy is assumed to stay open unless
  343. # explicitly closed.
  344. if conn and "close" in conn.lower():
  345. return True
  346. return False
  347. # Some HTTP/1.0 implementations have support for persistent
  348. # connections, using rules different than HTTP/1.1.
  349. # For older HTTP, Keep-Alive indicates persistent connection.
  350. if self.headers.get("keep-alive"):
  351. return False
  352. # At least Akamai returns a "Connection: Keep-Alive" header,
  353. # which was supposed to be sent by the client.
  354. if conn and "keep-alive" in conn.lower():
  355. return False
  356. # Proxy-Connection is a netscape hack.
  357. pconn = self.headers.get("proxy-connection")
  358. if pconn and "keep-alive" in pconn.lower():
  359. return False
  360. # otherwise, assume it will close
  361. return True
  362. def _close_conn(self):
  363. fp = self.fp
  364. self.fp = None
  365. fp.close()
  366. def close(self):
  367. try:
  368. super().close() # set "closed" flag
  369. finally:
  370. if self.fp:
  371. self._close_conn()
  372. # These implementations are for the benefit of io.BufferedReader.
  373. # XXX This class should probably be revised to act more like
  374. # the "raw stream" that BufferedReader expects.
  375. def flush(self):
  376. super().flush()
  377. if self.fp:
  378. self.fp.flush()
  379. def readable(self):
  380. """Always returns True"""
  381. return True
  382. # End of "raw stream" methods
  383. def isclosed(self):
  384. """True if the connection is closed."""
  385. # NOTE: it is possible that we will not ever call self.close(). This
  386. # case occurs when will_close is TRUE, length is None, and we
  387. # read up to the last byte, but NOT past it.
  388. #
  389. # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
  390. # called, meaning self.isclosed() is meaningful.
  391. return self.fp is None
  392. def read(self, amt=None):
  393. """Read and return the response body, or up to the next amt bytes."""
  394. if self.fp is None:
  395. return b""
  396. if self._method == "HEAD":
  397. self._close_conn()
  398. return b""
  399. if self.chunked:
  400. return self._read_chunked(amt)
  401. if amt is not None and amt >= 0:
  402. if self.length is not None and amt > self.length:
  403. # clip the read to the "end of response"
  404. amt = self.length
  405. s = self.fp.read(amt)
  406. if not s and amt:
  407. # Ideally, we would raise IncompleteRead if the content-length
  408. # wasn't satisfied, but it might break compatibility.
  409. self._close_conn()
  410. elif self.length is not None:
  411. self.length -= len(s)
  412. if not self.length:
  413. self._close_conn()
  414. return s
  415. else:
  416. # Amount is not given (unbounded read) so we must check self.length
  417. if self.length is None:
  418. s = self.fp.read()
  419. else:
  420. try:
  421. s = self._safe_read(self.length)
  422. except IncompleteRead:
  423. self._close_conn()
  424. raise
  425. self.length = 0
  426. self._close_conn() # we read everything
  427. return s
  428. def readinto(self, b):
  429. """Read up to len(b) bytes into bytearray b and return the number
  430. of bytes read.
  431. """
  432. if self.fp is None:
  433. return 0
  434. if self._method == "HEAD":
  435. self._close_conn()
  436. return 0
  437. if self.chunked:
  438. return self._readinto_chunked(b)
  439. if self.length is not None:
  440. if len(b) > self.length:
  441. # clip the read to the "end of response"
  442. b = memoryview(b)[0:self.length]
  443. # we do not use _safe_read() here because this may be a .will_close
  444. # connection, and the user is reading more bytes than will be provided
  445. # (for example, reading in 1k chunks)
  446. n = self.fp.readinto(b)
  447. if not n and b:
  448. # Ideally, we would raise IncompleteRead if the content-length
  449. # wasn't satisfied, but it might break compatibility.
  450. self._close_conn()
  451. elif self.length is not None:
  452. self.length -= n
  453. if not self.length:
  454. self._close_conn()
  455. return n
  456. def _read_next_chunk_size(self):
  457. # Read the next chunk size from the file
  458. line = self.fp.readline(_MAXLINE + 1)
  459. if len(line) > _MAXLINE:
  460. raise LineTooLong("chunk size")
  461. i = line.find(b";")
  462. if i >= 0:
  463. line = line[:i] # strip chunk-extensions
  464. try:
  465. return int(line, 16)
  466. except ValueError:
  467. # close the connection as protocol synchronisation is
  468. # probably lost
  469. self._close_conn()
  470. raise
  471. def _read_and_discard_trailer(self):
  472. # read and discard trailer up to the CRLF terminator
  473. ### note: we shouldn't have any trailers!
  474. while True:
  475. line = self.fp.readline(_MAXLINE + 1)
  476. if len(line) > _MAXLINE:
  477. raise LineTooLong("trailer line")
  478. if not line:
  479. # a vanishingly small number of sites EOF without
  480. # sending the trailer
  481. break
  482. if line in (b'\r\n', b'\n', b''):
  483. break
  484. def _get_chunk_left(self):
  485. # return self.chunk_left, reading a new chunk if necessary.
  486. # chunk_left == 0: at the end of the current chunk, need to close it
  487. # chunk_left == None: No current chunk, should read next.
  488. # This function returns non-zero or None if the last chunk has
  489. # been read.
  490. chunk_left = self.chunk_left
  491. if not chunk_left: # Can be 0 or None
  492. if chunk_left is not None:
  493. # We are at the end of chunk, discard chunk end
  494. self._safe_read(2) # toss the CRLF at the end of the chunk
  495. try:
  496. chunk_left = self._read_next_chunk_size()
  497. except ValueError:
  498. raise IncompleteRead(b'')
  499. if chunk_left == 0:
  500. # last chunk: 1*("0") [ chunk-extension ] CRLF
  501. self._read_and_discard_trailer()
  502. # we read everything; close the "file"
  503. self._close_conn()
  504. chunk_left = None
  505. self.chunk_left = chunk_left
  506. return chunk_left
  507. def _read_chunked(self, amt=None):
  508. assert self.chunked != _UNKNOWN
  509. if amt is not None and amt < 0:
  510. amt = None
  511. value = []
  512. try:
  513. while (chunk_left := self._get_chunk_left()) is not None:
  514. if amt is not None and amt <= chunk_left:
  515. value.append(self._safe_read(amt))
  516. self.chunk_left = chunk_left - amt
  517. break
  518. value.append(self._safe_read(chunk_left))
  519. if amt is not None:
  520. amt -= chunk_left
  521. self.chunk_left = 0
  522. return b''.join(value)
  523. except IncompleteRead as exc:
  524. raise IncompleteRead(b''.join(value)) from exc
  525. def _readinto_chunked(self, b):
  526. assert self.chunked != _UNKNOWN
  527. total_bytes = 0
  528. mvb = memoryview(b)
  529. try:
  530. while True:
  531. chunk_left = self._get_chunk_left()
  532. if chunk_left is None:
  533. return total_bytes
  534. if len(mvb) <= chunk_left:
  535. n = self._safe_readinto(mvb)
  536. self.chunk_left = chunk_left - n
  537. return total_bytes + n
  538. temp_mvb = mvb[:chunk_left]
  539. n = self._safe_readinto(temp_mvb)
  540. mvb = mvb[n:]
  541. total_bytes += n
  542. self.chunk_left = 0
  543. except IncompleteRead:
  544. raise IncompleteRead(bytes(b[0:total_bytes]))
  545. def _safe_read(self, amt):
  546. """Read the number of bytes requested.
  547. This function should be used when <amt> bytes "should" be present for
  548. reading. If the bytes are truly not available (due to EOF), then the
  549. IncompleteRead exception can be used to detect the problem.
  550. """
  551. data = self.fp.read(amt)
  552. if len(data) < amt:
  553. raise IncompleteRead(data, amt-len(data))
  554. return data
  555. def _safe_readinto(self, b):
  556. """Same as _safe_read, but for reading into a buffer."""
  557. amt = len(b)
  558. n = self.fp.readinto(b)
  559. if n < amt:
  560. raise IncompleteRead(bytes(b[:n]), amt-n)
  561. return n
  562. def read1(self, n=-1):
  563. """Read with at most one underlying system call. If at least one
  564. byte is buffered, return that instead.
  565. """
  566. if self.fp is None or self._method == "HEAD":
  567. return b""
  568. if self.chunked:
  569. return self._read1_chunked(n)
  570. if self.length is not None and (n < 0 or n > self.length):
  571. n = self.length
  572. result = self.fp.read1(n)
  573. if not result and n:
  574. self._close_conn()
  575. elif self.length is not None:
  576. self.length -= len(result)
  577. if not self.length:
  578. self._close_conn()
  579. return result
  580. def peek(self, n=-1):
  581. # Having this enables IOBase.readline() to read more than one
  582. # byte at a time
  583. if self.fp is None or self._method == "HEAD":
  584. return b""
  585. if self.chunked:
  586. return self._peek_chunked(n)
  587. return self.fp.peek(n)
  588. def readline(self, limit=-1):
  589. if self.fp is None or self._method == "HEAD":
  590. return b""
  591. if self.chunked:
  592. # Fallback to IOBase readline which uses peek() and read()
  593. return super().readline(limit)
  594. if self.length is not None and (limit < 0 or limit > self.length):
  595. limit = self.length
  596. result = self.fp.readline(limit)
  597. if not result and limit:
  598. self._close_conn()
  599. elif self.length is not None:
  600. self.length -= len(result)
  601. if not self.length:
  602. self._close_conn()
  603. return result
  604. def _read1_chunked(self, n):
  605. # Strictly speaking, _get_chunk_left() may cause more than one read,
  606. # but that is ok, since that is to satisfy the chunked protocol.
  607. chunk_left = self._get_chunk_left()
  608. if chunk_left is None or n == 0:
  609. return b''
  610. if not (0 <= n <= chunk_left):
  611. n = chunk_left # if n is negative or larger than chunk_left
  612. read = self.fp.read1(n)
  613. self.chunk_left -= len(read)
  614. if not read:
  615. raise IncompleteRead(b"")
  616. return read
  617. def _peek_chunked(self, n):
  618. # Strictly speaking, _get_chunk_left() may cause more than one read,
  619. # but that is ok, since that is to satisfy the chunked protocol.
  620. try:
  621. chunk_left = self._get_chunk_left()
  622. except IncompleteRead:
  623. return b'' # peek doesn't worry about protocol
  624. if chunk_left is None:
  625. return b'' # eof
  626. # peek is allowed to return more than requested. Just request the
  627. # entire chunk, and truncate what we get.
  628. return self.fp.peek(chunk_left)[:chunk_left]
  629. def fileno(self):
  630. return self.fp.fileno()
  631. def getheader(self, name, default=None):
  632. '''Returns the value of the header matching *name*.
  633. If there are multiple matching headers, the values are
  634. combined into a single string separated by commas and spaces.
  635. If no matching header is found, returns *default* or None if
  636. the *default* is not specified.
  637. If the headers are unknown, raises http.client.ResponseNotReady.
  638. '''
  639. if self.headers is None:
  640. raise ResponseNotReady()
  641. headers = self.headers.get_all(name) or default
  642. if isinstance(headers, str) or not hasattr(headers, '__iter__'):
  643. return headers
  644. else:
  645. return ', '.join(headers)
  646. def getheaders(self):
  647. """Return list of (header, value) tuples."""
  648. if self.headers is None:
  649. raise ResponseNotReady()
  650. return list(self.headers.items())
  651. # We override IOBase.__iter__ so that it doesn't check for closed-ness
  652. def __iter__(self):
  653. return self
  654. # For compatibility with old-style urllib responses.
  655. def info(self):
  656. '''Returns an instance of the class mimetools.Message containing
  657. meta-information associated with the URL.
  658. When the method is HTTP, these headers are those returned by
  659. the server at the head of the retrieved HTML page (including
  660. Content-Length and Content-Type).
  661. When the method is FTP, a Content-Length header will be
  662. present if (as is now usual) the server passed back a file
  663. length in response to the FTP retrieval request. A
  664. Content-Type header will be present if the MIME type can be
  665. guessed.
  666. When the method is local-file, returned headers will include
  667. a Date representing the file's last-modified time, a
  668. Content-Length giving file size, and a Content-Type
  669. containing a guess at the file's type. See also the
  670. description of the mimetools module.
  671. '''
  672. return self.headers
  673. def geturl(self):
  674. '''Return the real URL of the page.
  675. In some cases, the HTTP server redirects a client to another
  676. URL. The urlopen() function handles this transparently, but in
  677. some cases the caller needs to know which URL the client was
  678. redirected to. The geturl() method can be used to get at this
  679. redirected URL.
  680. '''
  681. return self.url
  682. def getcode(self):
  683. '''Return the HTTP status code that was sent with the response,
  684. or None if the URL is not an HTTP URL.
  685. '''
  686. return self.status
  687. def _create_https_context(http_version):
  688. # Function also used by urllib.request to be able to set the check_hostname
  689. # attribute on a context object.
  690. context = ssl._create_default_https_context()
  691. # send ALPN extension to indicate HTTP/1.1 protocol
  692. if http_version == 11:
  693. context.set_alpn_protocols(['http/1.1'])
  694. # enable PHA for TLS 1.3 connections if available
  695. if context.post_handshake_auth is not None:
  696. context.post_handshake_auth = True
  697. return context
  698. class HTTPConnection:
  699. _http_vsn = 11
  700. _http_vsn_str = 'HTTP/1.1'
  701. response_class = HTTPResponse
  702. default_port = HTTP_PORT
  703. auto_open = 1
  704. debuglevel = 0
  705. @staticmethod
  706. def _is_textIO(stream):
  707. """Test whether a file-like object is a text or a binary stream.
  708. """
  709. return isinstance(stream, io.TextIOBase)
  710. @staticmethod
  711. def _get_content_length(body, method):
  712. """Get the content-length based on the body.
  713. If the body is None, we set Content-Length: 0 for methods that expect
  714. a body (RFC 7230, Section 3.3.2). We also set the Content-Length for
  715. any method if the body is a str or bytes-like object and not a file.
  716. """
  717. if body is None:
  718. # do an explicit check for not None here to distinguish
  719. # between unset and set but empty
  720. if method.upper() in _METHODS_EXPECTING_BODY:
  721. return 0
  722. else:
  723. return None
  724. if hasattr(body, 'read'):
  725. # file-like object.
  726. return None
  727. try:
  728. # does it implement the buffer protocol (bytes, bytearray, array)?
  729. mv = memoryview(body)
  730. return mv.nbytes
  731. except TypeError:
  732. pass
  733. if isinstance(body, str):
  734. return len(body)
  735. return None
  736. def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
  737. source_address=None, blocksize=8192):
  738. self.timeout = timeout
  739. self.source_address = source_address
  740. self.blocksize = blocksize
  741. self.sock = None
  742. self._buffer = []
  743. self.__response = None
  744. self.__state = _CS_IDLE
  745. self._method = None
  746. self._tunnel_host = None
  747. self._tunnel_port = None
  748. self._tunnel_headers = {}
  749. self._raw_proxy_headers = None
  750. (self.host, self.port) = self._get_hostport(host, port)
  751. self._validate_host(self.host)
  752. # This is stored as an instance variable to allow unit
  753. # tests to replace it with a suitable mockup
  754. self._create_connection = socket.create_connection
  755. def set_tunnel(self, host, port=None, headers=None):
  756. """Set up host and port for HTTP CONNECT tunnelling.
  757. In a connection that uses HTTP CONNECT tunnelling, the host passed to
  758. the constructor is used as a proxy server that relays all communication
  759. to the endpoint passed to `set_tunnel`. This done by sending an HTTP
  760. CONNECT request to the proxy server when the connection is established.
  761. This method must be called before the HTTP connection has been
  762. established.
  763. The headers argument should be a mapping of extra HTTP headers to send
  764. with the CONNECT request.
  765. As HTTP/1.1 is used for HTTP CONNECT tunnelling request, as per the RFC
  766. (https://tools.ietf.org/html/rfc7231#section-4.3.6), a HTTP Host:
  767. header must be provided, matching the authority-form of the request
  768. target provided as the destination for the CONNECT request. If a
  769. HTTP Host: header is not provided via the headers argument, one
  770. is generated and transmitted automatically.
  771. """
  772. if self.sock:
  773. raise RuntimeError("Can't set up tunnel for established connection")
  774. self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)
  775. if headers:
  776. self._tunnel_headers = headers.copy()
  777. else:
  778. self._tunnel_headers.clear()
  779. if not any(header.lower() == "host" for header in self._tunnel_headers):
  780. encoded_host = self._tunnel_host.encode("idna").decode("ascii")
  781. self._tunnel_headers["Host"] = "%s:%d" % (
  782. encoded_host, self._tunnel_port)
  783. def _get_hostport(self, host, port):
  784. if port is None:
  785. i = host.rfind(':')
  786. j = host.rfind(']') # ipv6 addresses have [...]
  787. if i > j:
  788. try:
  789. port = int(host[i+1:])
  790. except ValueError:
  791. if host[i+1:] == "": # http://foo.com:/ == http://foo.com/
  792. port = self.default_port
  793. else:
  794. raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
  795. host = host[:i]
  796. else:
  797. port = self.default_port
  798. if host and host[0] == '[' and host[-1] == ']':
  799. host = host[1:-1]
  800. return (host, port)
  801. def set_debuglevel(self, level):
  802. self.debuglevel = level
  803. def _wrap_ipv6(self, ip):
  804. if b':' in ip and ip[0] != b'['[0]:
  805. return b"[" + ip + b"]"
  806. return ip
  807. def _tunnel(self):
  808. connect = b"CONNECT %s:%d %s\r\n" % (
  809. self._wrap_ipv6(self._tunnel_host.encode("idna")),
  810. self._tunnel_port,
  811. self._http_vsn_str.encode("ascii"))
  812. headers = [connect]
  813. for header, value in self._tunnel_headers.items():
  814. headers.append(f"{header}: {value}\r\n".encode("latin-1"))
  815. headers.append(b"\r\n")
  816. # Making a single send() call instead of one per line encourages
  817. # the host OS to use a more optimal packet size instead of
  818. # potentially emitting a series of small packets.
  819. self.send(b"".join(headers))
  820. del headers
  821. response = self.response_class(self.sock, method=self._method)
  822. try:
  823. (version, code, message) = response._read_status()
  824. self._raw_proxy_headers = _read_headers(response.fp)
  825. if self.debuglevel > 0:
  826. for header in self._raw_proxy_headers:
  827. print('header:', header.decode())
  828. if code != http.HTTPStatus.OK:
  829. self.close()
  830. raise OSError(f"Tunnel connection failed: {code} {message.strip()}")
  831. finally:
  832. response.close()
  833. def get_proxy_response_headers(self):
  834. """
  835. Returns a dictionary with the headers of the response
  836. received from the proxy server to the CONNECT request
  837. sent to set the tunnel.
  838. If the CONNECT request was not sent, the method returns None.
  839. """
  840. return (
  841. _parse_header_lines(self._raw_proxy_headers)
  842. if self._raw_proxy_headers is not None
  843. else None
  844. )
  845. def connect(self):
  846. """Connect to the host and port specified in __init__."""
  847. sys.audit("http.client.connect", self, self.host, self.port)
  848. self.sock = self._create_connection(
  849. (self.host,self.port), self.timeout, self.source_address)
  850. # Might fail in OSs that don't implement TCP_NODELAY
  851. try:
  852. self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
  853. except OSError as e:
  854. if e.errno != errno.ENOPROTOOPT:
  855. raise
  856. if self._tunnel_host:
  857. self._tunnel()
  858. def close(self):
  859. """Close the connection to the HTTP server."""
  860. self.__state = _CS_IDLE
  861. try:
  862. sock = self.sock
  863. if sock:
  864. self.sock = None
  865. sock.close() # close it manually... there may be other refs
  866. finally:
  867. response = self.__response
  868. if response:
  869. self.__response = None
  870. response.close()
  871. def send(self, data):
  872. """Send `data' to the server.
  873. ``data`` can be a string object, a bytes object, an array object, a
  874. file-like object that supports a .read() method, or an iterable object.
  875. """
  876. if self.sock is None:
  877. if self.auto_open:
  878. self.connect()
  879. else:
  880. raise NotConnected()
  881. if self.debuglevel > 0:
  882. print("send:", repr(data))
  883. if hasattr(data, "read") :
  884. if self.debuglevel > 0:
  885. print("sending a readable")
  886. encode = self._is_textIO(data)
  887. if encode and self.debuglevel > 0:
  888. print("encoding file using iso-8859-1")
  889. while datablock := data.read(self.blocksize):
  890. if encode:
  891. datablock = datablock.encode("iso-8859-1")
  892. sys.audit("http.client.send", self, datablock)
  893. self.sock.sendall(datablock)
  894. return
  895. sys.audit("http.client.send", self, data)
  896. try:
  897. self.sock.sendall(data)
  898. except TypeError:
  899. if isinstance(data, collections.abc.Iterable):
  900. for d in data:
  901. self.sock.sendall(d)
  902. else:
  903. raise TypeError("data should be a bytes-like object "
  904. "or an iterable, got %r" % type(data))
  905. def _output(self, s):
  906. """Add a line of output to the current request buffer.
  907. Assumes that the line does *not* end with \\r\\n.
  908. """
  909. self._buffer.append(s)
  910. def _read_readable(self, readable):
  911. if self.debuglevel > 0:
  912. print("reading a readable")
  913. encode = self._is_textIO(readable)
  914. if encode and self.debuglevel > 0:
  915. print("encoding file using iso-8859-1")
  916. while datablock := readable.read(self.blocksize):
  917. if encode:
  918. datablock = datablock.encode("iso-8859-1")
  919. yield datablock
  920. def _send_output(self, message_body=None, encode_chunked=False):
  921. """Send the currently buffered request and clear the buffer.
  922. Appends an extra \\r\\n to the buffer.
  923. A message_body may be specified, to be appended to the request.
  924. """
  925. self._buffer.extend((b"", b""))
  926. msg = b"\r\n".join(self._buffer)
  927. del self._buffer[:]
  928. self.send(msg)
  929. if message_body is not None:
  930. # create a consistent interface to message_body
  931. if hasattr(message_body, 'read'):
  932. # Let file-like take precedence over byte-like. This
  933. # is needed to allow the current position of mmap'ed
  934. # files to be taken into account.
  935. chunks = self._read_readable(message_body)
  936. else:
  937. try:
  938. # this is solely to check to see if message_body
  939. # implements the buffer API. it /would/ be easier
  940. # to capture if PyObject_CheckBuffer was exposed
  941. # to Python.
  942. memoryview(message_body)
  943. except TypeError:
  944. try:
  945. chunks = iter(message_body)
  946. except TypeError:
  947. raise TypeError("message_body should be a bytes-like "
  948. "object or an iterable, got %r"
  949. % type(message_body))
  950. else:
  951. # the object implements the buffer interface and
  952. # can be passed directly into socket methods
  953. chunks = (message_body,)
  954. for chunk in chunks:
  955. if not chunk:
  956. if self.debuglevel > 0:
  957. print('Zero length chunk ignored')
  958. continue
  959. if encode_chunked and self._http_vsn == 11:
  960. # chunked encoding
  961. chunk = f'{len(chunk):X}\r\n'.encode('ascii') + chunk \
  962. + b'\r\n'
  963. self.send(chunk)
  964. if encode_chunked and self._http_vsn == 11:
  965. # end chunked transfer
  966. self.send(b'0\r\n\r\n')
  967. def putrequest(self, method, url, skip_host=False,
  968. skip_accept_encoding=False):
  969. """Send a request to the server.
  970. `method' specifies an HTTP request method, e.g. 'GET'.
  971. `url' specifies the object being requested, e.g. '/index.html'.
  972. `skip_host' if True does not add automatically a 'Host:' header
  973. `skip_accept_encoding' if True does not add automatically an
  974. 'Accept-Encoding:' header
  975. """
  976. # if a prior response has been completed, then forget about it.
  977. if self.__response and self.__response.isclosed():
  978. self.__response = None
  979. # in certain cases, we cannot issue another request on this connection.
  980. # this occurs when:
  981. # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
  982. # 2) a response to a previous request has signalled that it is going
  983. # to close the connection upon completion.
  984. # 3) the headers for the previous response have not been read, thus
  985. # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
  986. #
  987. # if there is no prior response, then we can request at will.
  988. #
  989. # if point (2) is true, then we will have passed the socket to the
  990. # response (effectively meaning, "there is no prior response"), and
  991. # will open a new one when a new request is made.
  992. #
  993. # Note: if a prior response exists, then we *can* start a new request.
  994. # We are not allowed to begin fetching the response to this new
  995. # request, however, until that prior response is complete.
  996. #
  997. if self.__state == _CS_IDLE:
  998. self.__state = _CS_REQ_STARTED
  999. else:
  1000. raise CannotSendRequest(self.__state)
  1001. self._validate_method(method)
  1002. # Save the method for use later in the response phase
  1003. self._method = method
  1004. url = url or '/'
  1005. self._validate_path(url)
  1006. request = '%s %s %s' % (method, url, self._http_vsn_str)
  1007. self._output(self._encode_request(request))
  1008. if self._http_vsn == 11:
  1009. # Issue some standard headers for better HTTP/1.1 compliance
  1010. if not skip_host:
  1011. # this header is issued *only* for HTTP/1.1
  1012. # connections. more specifically, this means it is
  1013. # only issued when the client uses the new
  1014. # HTTPConnection() class. backwards-compat clients
  1015. # will be using HTTP/1.0 and those clients may be
  1016. # issuing this header themselves. we should NOT issue
  1017. # it twice; some web servers (such as Apache) barf
  1018. # when they see two Host: headers
  1019. # If we need a non-standard port,include it in the
  1020. # header. If the request is going through a proxy,
  1021. # but the host of the actual URL, not the host of the
  1022. # proxy.
  1023. netloc = ''
  1024. if url.startswith('http'):
  1025. nil, netloc, nil, nil, nil = urlsplit(url)
  1026. if netloc:
  1027. try:
  1028. netloc_enc = netloc.encode("ascii")
  1029. except UnicodeEncodeError:
  1030. netloc_enc = netloc.encode("idna")
  1031. self.putheader('Host', _strip_ipv6_iface(netloc_enc))
  1032. else:
  1033. if self._tunnel_host:
  1034. host = self._tunnel_host
  1035. port = self._tunnel_port
  1036. else:
  1037. host = self.host
  1038. port = self.port
  1039. try:
  1040. host_enc = host.encode("ascii")
  1041. except UnicodeEncodeError:
  1042. host_enc = host.encode("idna")
  1043. # As per RFC 273, IPv6 address should be wrapped with []
  1044. # when used as Host header
  1045. host_enc = self._wrap_ipv6(host_enc)
  1046. if ":" in host:
  1047. host_enc = _strip_ipv6_iface(host_enc)
  1048. if port == self.default_port:
  1049. self.putheader('Host', host_enc)
  1050. else:
  1051. host_enc = host_enc.decode("ascii")
  1052. self.putheader('Host', "%s:%s" % (host_enc, port))
  1053. # note: we are assuming that clients will not attempt to set these
  1054. # headers since *this* library must deal with the
  1055. # consequences. this also means that when the supporting
  1056. # libraries are updated to recognize other forms, then this
  1057. # code should be changed (removed or updated).
  1058. # we only want a Content-Encoding of "identity" since we don't
  1059. # support encodings such as x-gzip or x-deflate.
  1060. if not skip_accept_encoding:
  1061. self.putheader('Accept-Encoding', 'identity')
  1062. # we can accept "chunked" Transfer-Encodings, but no others
  1063. # NOTE: no TE header implies *only* "chunked"
  1064. #self.putheader('TE', 'chunked')
  1065. # if TE is supplied in the header, then it must appear in a
  1066. # Connection header.
  1067. #self.putheader('Connection', 'TE')
  1068. else:
  1069. # For HTTP/1.0, the server will assume "not chunked"
  1070. pass
  1071. def _encode_request(self, request):
  1072. # ASCII also helps prevent CVE-2019-9740.
  1073. return request.encode('ascii')
  1074. def _validate_method(self, method):
  1075. """Validate a method name for putrequest."""
  1076. # prevent http header injection
  1077. match = _contains_disallowed_method_pchar_re.search(method)
  1078. if match:
  1079. raise ValueError(
  1080. f"method can't contain control characters. {method!r} "
  1081. f"(found at least {match.group()!r})")
  1082. def _validate_path(self, url):
  1083. """Validate a url for putrequest."""
  1084. # Prevent CVE-2019-9740.
  1085. match = _contains_disallowed_url_pchar_re.search(url)
  1086. if match:
  1087. raise InvalidURL(f"URL can't contain control characters. {url!r} "
  1088. f"(found at least {match.group()!r})")
  1089. def _validate_host(self, host):
  1090. """Validate a host so it doesn't contain control characters."""
  1091. # Prevent CVE-2019-18348.
  1092. match = _contains_disallowed_url_pchar_re.search(host)
  1093. if match:
  1094. raise InvalidURL(f"URL can't contain control characters. {host!r} "
  1095. f"(found at least {match.group()!r})")
  1096. def putheader(self, header, *values):
  1097. """Send a request header line to the server.
  1098. For example: h.putheader('Accept', 'text/html')
  1099. """
  1100. if self.__state != _CS_REQ_STARTED:
  1101. raise CannotSendHeader()
  1102. if hasattr(header, 'encode'):
  1103. header = header.encode('ascii')
  1104. if not _is_legal_header_name(header):
  1105. raise ValueError('Invalid header name %r' % (header,))
  1106. values = list(values)
  1107. for i, one_value in enumerate(values):
  1108. if hasattr(one_value, 'encode'):
  1109. values[i] = one_value.encode('latin-1')
  1110. elif isinstance(one_value, int):
  1111. values[i] = str(one_value).encode('ascii')
  1112. if _is_illegal_header_value(values[i]):
  1113. raise ValueError('Invalid header value %r' % (values[i],))
  1114. value = b'\r\n\t'.join(values)
  1115. header = header + b': ' + value
  1116. self._output(header)
  1117. def endheaders(self, message_body=None, *, encode_chunked=False):
  1118. """Indicate that the last header line has been sent to the server.
  1119. This method sends the request to the server. The optional message_body
  1120. argument can be used to pass a message body associated with the
  1121. request.
  1122. """
  1123. if self.__state == _CS_REQ_STARTED:
  1124. self.__state = _CS_REQ_SENT
  1125. else:
  1126. raise CannotSendHeader()
  1127. self._send_output(message_body, encode_chunked=encode_chunked)
  1128. def request(self, method, url, body=None, headers={}, *,
  1129. encode_chunked=False):
  1130. """Send a complete request to the server."""
  1131. self._send_request(method, url, body, headers, encode_chunked)
  1132. def _send_request(self, method, url, body, headers, encode_chunked):
  1133. # Honor explicitly requested Host: and Accept-Encoding: headers.
  1134. header_names = frozenset(k.lower() for k in headers)
  1135. skips = {}
  1136. if 'host' in header_names:
  1137. skips['skip_host'] = 1
  1138. if 'accept-encoding' in header_names:
  1139. skips['skip_accept_encoding'] = 1
  1140. self.putrequest(method, url, **skips)
  1141. # chunked encoding will happen if HTTP/1.1 is used and either
  1142. # the caller passes encode_chunked=True or the following
  1143. # conditions hold:
  1144. # 1. content-length has not been explicitly set
  1145. # 2. the body is a file or iterable, but not a str or bytes-like
  1146. # 3. Transfer-Encoding has NOT been explicitly set by the caller
  1147. if 'content-length' not in header_names:
  1148. # only chunk body if not explicitly set for backwards
  1149. # compatibility, assuming the client code is already handling the
  1150. # chunking
  1151. if 'transfer-encoding' not in header_names:
  1152. # if content-length cannot be automatically determined, fall
  1153. # back to chunked encoding
  1154. encode_chunked = False
  1155. content_length = self._get_content_length(body, method)
  1156. if content_length is None:
  1157. if body is not None:
  1158. if self.debuglevel > 0:
  1159. print('Unable to determine size of %r' % body)
  1160. encode_chunked = True
  1161. self.putheader('Transfer-Encoding', 'chunked')
  1162. else:
  1163. self.putheader('Content-Length', str(content_length))
  1164. else:
  1165. encode_chunked = False
  1166. for hdr, value in headers.items():
  1167. self.putheader(hdr, value)
  1168. if isinstance(body, str):
  1169. # RFC 2616 Section 3.7.1 says that text default has a
  1170. # default charset of iso-8859-1.
  1171. body = _encode(body, 'body')
  1172. self.endheaders(body, encode_chunked=encode_chunked)
  1173. def getresponse(self):
  1174. """Get the response from the server.
  1175. If the HTTPConnection is in the correct state, returns an
  1176. instance of HTTPResponse or of whatever object is returned by
  1177. the response_class variable.
  1178. If a request has not been sent or if a previous response has
  1179. not be handled, ResponseNotReady is raised. If the HTTP
  1180. response indicates that the connection should be closed, then
  1181. it will be closed before the response is returned. When the
  1182. connection is closed, the underlying socket is closed.
  1183. """
  1184. # if a prior response has been completed, then forget about it.
  1185. if self.__response and self.__response.isclosed():
  1186. self.__response = None
  1187. # if a prior response exists, then it must be completed (otherwise, we
  1188. # cannot read this response's header to determine the connection-close
  1189. # behavior)
  1190. #
  1191. # note: if a prior response existed, but was connection-close, then the
  1192. # socket and response were made independent of this HTTPConnection
  1193. # object since a new request requires that we open a whole new
  1194. # connection
  1195. #
  1196. # this means the prior response had one of two states:
  1197. # 1) will_close: this connection was reset and the prior socket and
  1198. # response operate independently
  1199. # 2) persistent: the response was retained and we await its
  1200. # isclosed() status to become true.
  1201. #
  1202. if self.__state != _CS_REQ_SENT or self.__response:
  1203. raise ResponseNotReady(self.__state)
  1204. if self.debuglevel > 0:
  1205. response = self.response_class(self.sock, self.debuglevel,
  1206. method=self._method)
  1207. else:
  1208. response = self.response_class(self.sock, method=self._method)
  1209. try:
  1210. try:
  1211. response.begin()
  1212. except ConnectionError:
  1213. self.close()
  1214. raise
  1215. assert response.will_close != _UNKNOWN
  1216. self.__state = _CS_IDLE
  1217. if response.will_close:
  1218. # this effectively passes the connection to the response
  1219. self.close()
  1220. else:
  1221. # remember this, so we can tell when it is complete
  1222. self.__response = response
  1223. return response
  1224. except:
  1225. response.close()
  1226. raise
  1227. try:
  1228. import ssl
  1229. except ImportError:
  1230. pass
  1231. else:
  1232. class HTTPSConnection(HTTPConnection):
  1233. "This class allows communication via SSL."
  1234. default_port = HTTPS_PORT
  1235. def __init__(self, host, port=None,
  1236. *, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
  1237. source_address=None, context=None, blocksize=8192):
  1238. super(HTTPSConnection, self).__init__(host, port, timeout,
  1239. source_address,
  1240. blocksize=blocksize)
  1241. if context is None:
  1242. context = _create_https_context(self._http_vsn)
  1243. self._context = context
  1244. def connect(self):
  1245. "Connect to a host on a given (SSL) port."
  1246. super().connect()
  1247. if self._tunnel_host:
  1248. server_hostname = self._tunnel_host
  1249. else:
  1250. server_hostname = self.host
  1251. self.sock = self._context.wrap_socket(self.sock,
  1252. server_hostname=server_hostname)
  1253. __all__.append("HTTPSConnection")
  1254. class HTTPException(Exception):
  1255. # Subclasses that define an __init__ must call Exception.__init__
  1256. # or define self.args. Otherwise, str() will fail.
  1257. pass
  1258. class NotConnected(HTTPException):
  1259. pass
  1260. class InvalidURL(HTTPException):
  1261. pass
  1262. class UnknownProtocol(HTTPException):
  1263. def __init__(self, version):
  1264. self.args = version,
  1265. self.version = version
  1266. class UnknownTransferEncoding(HTTPException):
  1267. pass
  1268. class UnimplementedFileMode(HTTPException):
  1269. pass
  1270. class IncompleteRead(HTTPException):
  1271. def __init__(self, partial, expected=None):
  1272. self.args = partial,
  1273. self.partial = partial
  1274. self.expected = expected
  1275. def __repr__(self):
  1276. if self.expected is not None:
  1277. e = ', %i more expected' % self.expected
  1278. else:
  1279. e = ''
  1280. return '%s(%i bytes read%s)' % (self.__class__.__name__,
  1281. len(self.partial), e)
  1282. __str__ = object.__str__
  1283. class ImproperConnectionState(HTTPException):
  1284. pass
  1285. class CannotSendRequest(ImproperConnectionState):
  1286. pass
  1287. class CannotSendHeader(ImproperConnectionState):
  1288. pass
  1289. class ResponseNotReady(ImproperConnectionState):
  1290. pass
  1291. class BadStatusLine(HTTPException):
  1292. def __init__(self, line):
  1293. if not line:
  1294. line = repr(line)
  1295. self.args = line,
  1296. self.line = line
  1297. class LineTooLong(HTTPException):
  1298. def __init__(self, line_type):
  1299. HTTPException.__init__(self, "got more than %d bytes when reading %s"
  1300. % (_MAXLINE, line_type))
  1301. class RemoteDisconnected(ConnectionResetError, BadStatusLine):
  1302. def __init__(self, *pos, **kw):
  1303. BadStatusLine.__init__(self, "")
  1304. ConnectionResetError.__init__(self, *pos, **kw)
  1305. # for backwards compatibility
  1306. error = HTTPException