http_parser.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956
  1. import abc
  2. import asyncio
  3. import collections
  4. import re
  5. import string
  6. import zlib
  7. from contextlib import suppress
  8. from enum import IntEnum
  9. from typing import (
  10. Any,
  11. Generic,
  12. List,
  13. NamedTuple,
  14. Optional,
  15. Pattern,
  16. Set,
  17. Tuple,
  18. Type,
  19. TypeVar,
  20. Union,
  21. cast,
  22. )
  23. from multidict import CIMultiDict, CIMultiDictProxy, istr
  24. from yarl import URL
  25. from . import hdrs
  26. from .base_protocol import BaseProtocol
  27. from .helpers import NO_EXTENSIONS, BaseTimerContext
  28. from .http_exceptions import (
  29. BadHttpMessage,
  30. BadStatusLine,
  31. ContentEncodingError,
  32. ContentLengthError,
  33. InvalidHeader,
  34. LineTooLong,
  35. TransferEncodingError,
  36. )
  37. from .http_writer import HttpVersion, HttpVersion10
  38. from .log import internal_logger
  39. from .streams import EMPTY_PAYLOAD, StreamReader
  40. from .typedefs import Final, RawHeaders
  41. try:
  42. import brotli
  43. HAS_BROTLI = True
  44. except ImportError: # pragma: no cover
  45. HAS_BROTLI = False
  46. __all__ = (
  47. "HeadersParser",
  48. "HttpParser",
  49. "HttpRequestParser",
  50. "HttpResponseParser",
  51. "RawRequestMessage",
  52. "RawResponseMessage",
  53. )
  54. ASCIISET: Final[Set[str]] = set(string.printable)
  55. # See https://tools.ietf.org/html/rfc7230#section-3.1.1
  56. # and https://tools.ietf.org/html/rfc7230#appendix-B
  57. #
  58. # method = token
  59. # tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
  60. # "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
  61. # token = 1*tchar
  62. METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")
  63. VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d+).(\d+)")
  64. HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]")
  65. class RawRequestMessage(NamedTuple):
  66. method: str
  67. path: str
  68. version: HttpVersion
  69. headers: "CIMultiDictProxy[str]"
  70. raw_headers: RawHeaders
  71. should_close: bool
  72. compression: Optional[str]
  73. upgrade: bool
  74. chunked: bool
  75. url: URL
  76. RawResponseMessage = collections.namedtuple(
  77. "RawResponseMessage",
  78. [
  79. "version",
  80. "code",
  81. "reason",
  82. "headers",
  83. "raw_headers",
  84. "should_close",
  85. "compression",
  86. "upgrade",
  87. "chunked",
  88. ],
  89. )
  90. _MsgT = TypeVar("_MsgT", RawRequestMessage, RawResponseMessage)
  91. class ParseState(IntEnum):
  92. PARSE_NONE = 0
  93. PARSE_LENGTH = 1
  94. PARSE_CHUNKED = 2
  95. PARSE_UNTIL_EOF = 3
  96. class ChunkState(IntEnum):
  97. PARSE_CHUNKED_SIZE = 0
  98. PARSE_CHUNKED_CHUNK = 1
  99. PARSE_CHUNKED_CHUNK_EOF = 2
  100. PARSE_MAYBE_TRAILERS = 3
  101. PARSE_TRAILERS = 4
  102. class HeadersParser:
  103. def __init__(
  104. self,
  105. max_line_size: int = 8190,
  106. max_headers: int = 32768,
  107. max_field_size: int = 8190,
  108. ) -> None:
  109. self.max_line_size = max_line_size
  110. self.max_headers = max_headers
  111. self.max_field_size = max_field_size
  112. def parse_headers(
  113. self, lines: List[bytes]
  114. ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]:
  115. headers = CIMultiDict() # type: CIMultiDict[str]
  116. raw_headers = []
  117. lines_idx = 1
  118. line = lines[1]
  119. line_count = len(lines)
  120. while line:
  121. # Parse initial header name : value pair.
  122. try:
  123. bname, bvalue = line.split(b":", 1)
  124. except ValueError:
  125. raise InvalidHeader(line) from None
  126. bname = bname.strip(b" \t")
  127. bvalue = bvalue.lstrip()
  128. if HDRRE.search(bname):
  129. raise InvalidHeader(bname)
  130. if len(bname) > self.max_field_size:
  131. raise LineTooLong(
  132. "request header name {}".format(
  133. bname.decode("utf8", "xmlcharrefreplace")
  134. ),
  135. str(self.max_field_size),
  136. str(len(bname)),
  137. )
  138. header_length = len(bvalue)
  139. # next line
  140. lines_idx += 1
  141. line = lines[lines_idx]
  142. # consume continuation lines
  143. continuation = line and line[0] in (32, 9) # (' ', '\t')
  144. if continuation:
  145. bvalue_lst = [bvalue]
  146. while continuation:
  147. header_length += len(line)
  148. if header_length > self.max_field_size:
  149. raise LineTooLong(
  150. "request header field {}".format(
  151. bname.decode("utf8", "xmlcharrefreplace")
  152. ),
  153. str(self.max_field_size),
  154. str(header_length),
  155. )
  156. bvalue_lst.append(line)
  157. # next line
  158. lines_idx += 1
  159. if lines_idx < line_count:
  160. line = lines[lines_idx]
  161. if line:
  162. continuation = line[0] in (32, 9) # (' ', '\t')
  163. else:
  164. line = b""
  165. break
  166. bvalue = b"".join(bvalue_lst)
  167. else:
  168. if header_length > self.max_field_size:
  169. raise LineTooLong(
  170. "request header field {}".format(
  171. bname.decode("utf8", "xmlcharrefreplace")
  172. ),
  173. str(self.max_field_size),
  174. str(header_length),
  175. )
  176. bvalue = bvalue.strip()
  177. name = bname.decode("utf-8", "surrogateescape")
  178. value = bvalue.decode("utf-8", "surrogateescape")
  179. headers.add(name, value)
  180. raw_headers.append((bname, bvalue))
  181. return (CIMultiDictProxy(headers), tuple(raw_headers))
  182. class HttpParser(abc.ABC, Generic[_MsgT]):
  183. def __init__(
  184. self,
  185. protocol: Optional[BaseProtocol] = None,
  186. loop: Optional[asyncio.AbstractEventLoop] = None,
  187. limit: int = 2 ** 16,
  188. max_line_size: int = 8190,
  189. max_headers: int = 32768,
  190. max_field_size: int = 8190,
  191. timer: Optional[BaseTimerContext] = None,
  192. code: Optional[int] = None,
  193. method: Optional[str] = None,
  194. readall: bool = False,
  195. payload_exception: Optional[Type[BaseException]] = None,
  196. response_with_body: bool = True,
  197. read_until_eof: bool = False,
  198. auto_decompress: bool = True,
  199. ) -> None:
  200. self.protocol = protocol
  201. self.loop = loop
  202. self.max_line_size = max_line_size
  203. self.max_headers = max_headers
  204. self.max_field_size = max_field_size
  205. self.timer = timer
  206. self.code = code
  207. self.method = method
  208. self.readall = readall
  209. self.payload_exception = payload_exception
  210. self.response_with_body = response_with_body
  211. self.read_until_eof = read_until_eof
  212. self._lines = [] # type: List[bytes]
  213. self._tail = b""
  214. self._upgraded = False
  215. self._payload = None
  216. self._payload_parser = None # type: Optional[HttpPayloadParser]
  217. self._auto_decompress = auto_decompress
  218. self._limit = limit
  219. self._headers_parser = HeadersParser(max_line_size, max_headers, max_field_size)
  220. @abc.abstractmethod
  221. def parse_message(self, lines: List[bytes]) -> _MsgT:
  222. pass
  223. def feed_eof(self) -> Optional[_MsgT]:
  224. if self._payload_parser is not None:
  225. self._payload_parser.feed_eof()
  226. self._payload_parser = None
  227. else:
  228. # try to extract partial message
  229. if self._tail:
  230. self._lines.append(self._tail)
  231. if self._lines:
  232. if self._lines[-1] != "\r\n":
  233. self._lines.append(b"")
  234. with suppress(Exception):
  235. return self.parse_message(self._lines)
  236. return None
  237. def feed_data(
  238. self,
  239. data: bytes,
  240. SEP: bytes = b"\r\n",
  241. EMPTY: bytes = b"",
  242. CONTENT_LENGTH: istr = hdrs.CONTENT_LENGTH,
  243. METH_CONNECT: str = hdrs.METH_CONNECT,
  244. SEC_WEBSOCKET_KEY1: istr = hdrs.SEC_WEBSOCKET_KEY1,
  245. ) -> Tuple[List[Tuple[_MsgT, StreamReader]], bool, bytes]:
  246. messages = []
  247. if self._tail:
  248. data, self._tail = self._tail + data, b""
  249. data_len = len(data)
  250. start_pos = 0
  251. loop = self.loop
  252. while start_pos < data_len:
  253. # read HTTP message (request/response line + headers), \r\n\r\n
  254. # and split by lines
  255. if self._payload_parser is None and not self._upgraded:
  256. pos = data.find(SEP, start_pos)
  257. # consume \r\n
  258. if pos == start_pos and not self._lines:
  259. start_pos = pos + 2
  260. continue
  261. if pos >= start_pos:
  262. # line found
  263. self._lines.append(data[start_pos:pos])
  264. start_pos = pos + 2
  265. # \r\n\r\n found
  266. if self._lines[-1] == EMPTY:
  267. try:
  268. msg: _MsgT = self.parse_message(self._lines)
  269. finally:
  270. self._lines.clear()
  271. def get_content_length() -> Optional[int]:
  272. # payload length
  273. length_hdr = msg.headers.get(CONTENT_LENGTH)
  274. if length_hdr is None:
  275. return None
  276. try:
  277. length = int(length_hdr)
  278. except ValueError:
  279. raise InvalidHeader(CONTENT_LENGTH)
  280. if length < 0:
  281. raise InvalidHeader(CONTENT_LENGTH)
  282. return length
  283. length = get_content_length()
  284. # do not support old websocket spec
  285. if SEC_WEBSOCKET_KEY1 in msg.headers:
  286. raise InvalidHeader(SEC_WEBSOCKET_KEY1)
  287. self._upgraded = msg.upgrade
  288. method = getattr(msg, "method", self.method)
  289. assert self.protocol is not None
  290. # calculate payload
  291. if (
  292. (length is not None and length > 0)
  293. or msg.chunked
  294. and not msg.upgrade
  295. ):
  296. payload = StreamReader(
  297. self.protocol,
  298. timer=self.timer,
  299. loop=loop,
  300. limit=self._limit,
  301. )
  302. payload_parser = HttpPayloadParser(
  303. payload,
  304. length=length,
  305. chunked=msg.chunked,
  306. method=method,
  307. compression=msg.compression,
  308. code=self.code,
  309. readall=self.readall,
  310. response_with_body=self.response_with_body,
  311. auto_decompress=self._auto_decompress,
  312. )
  313. if not payload_parser.done:
  314. self._payload_parser = payload_parser
  315. elif method == METH_CONNECT:
  316. assert isinstance(msg, RawRequestMessage)
  317. payload = StreamReader(
  318. self.protocol,
  319. timer=self.timer,
  320. loop=loop,
  321. limit=self._limit,
  322. )
  323. self._upgraded = True
  324. self._payload_parser = HttpPayloadParser(
  325. payload,
  326. method=msg.method,
  327. compression=msg.compression,
  328. readall=True,
  329. auto_decompress=self._auto_decompress,
  330. )
  331. else:
  332. if (
  333. getattr(msg, "code", 100) >= 199
  334. and length is None
  335. and self.read_until_eof
  336. ):
  337. payload = StreamReader(
  338. self.protocol,
  339. timer=self.timer,
  340. loop=loop,
  341. limit=self._limit,
  342. )
  343. payload_parser = HttpPayloadParser(
  344. payload,
  345. length=length,
  346. chunked=msg.chunked,
  347. method=method,
  348. compression=msg.compression,
  349. code=self.code,
  350. readall=True,
  351. response_with_body=self.response_with_body,
  352. auto_decompress=self._auto_decompress,
  353. )
  354. if not payload_parser.done:
  355. self._payload_parser = payload_parser
  356. else:
  357. payload = EMPTY_PAYLOAD
  358. messages.append((msg, payload))
  359. else:
  360. self._tail = data[start_pos:]
  361. data = EMPTY
  362. break
  363. # no parser, just store
  364. elif self._payload_parser is None and self._upgraded:
  365. assert not self._lines
  366. break
  367. # feed payload
  368. elif data and start_pos < data_len:
  369. assert not self._lines
  370. assert self._payload_parser is not None
  371. try:
  372. eof, data = self._payload_parser.feed_data(data[start_pos:])
  373. except BaseException as exc:
  374. if self.payload_exception is not None:
  375. self._payload_parser.payload.set_exception(
  376. self.payload_exception(str(exc))
  377. )
  378. else:
  379. self._payload_parser.payload.set_exception(exc)
  380. eof = True
  381. data = b""
  382. if eof:
  383. start_pos = 0
  384. data_len = len(data)
  385. self._payload_parser = None
  386. continue
  387. else:
  388. break
  389. if data and start_pos < data_len:
  390. data = data[start_pos:]
  391. else:
  392. data = EMPTY
  393. return messages, self._upgraded, data
  394. def parse_headers(
  395. self, lines: List[bytes]
  396. ) -> Tuple[
  397. "CIMultiDictProxy[str]", RawHeaders, Optional[bool], Optional[str], bool, bool
  398. ]:
  399. """Parses RFC 5322 headers from a stream.
  400. Line continuations are supported. Returns list of header name
  401. and value pairs. Header name is in upper case.
  402. """
  403. headers, raw_headers = self._headers_parser.parse_headers(lines)
  404. close_conn = None
  405. encoding = None
  406. upgrade = False
  407. chunked = False
  408. # keep-alive
  409. conn = headers.get(hdrs.CONNECTION)
  410. if conn:
  411. v = conn.lower()
  412. if v == "close":
  413. close_conn = True
  414. elif v == "keep-alive":
  415. close_conn = False
  416. elif v == "upgrade":
  417. upgrade = True
  418. # encoding
  419. enc = headers.get(hdrs.CONTENT_ENCODING)
  420. if enc:
  421. enc = enc.lower()
  422. if enc in ("gzip", "deflate", "br"):
  423. encoding = enc
  424. # chunking
  425. te = headers.get(hdrs.TRANSFER_ENCODING)
  426. if te is not None:
  427. if "chunked" == te.lower():
  428. chunked = True
  429. else:
  430. raise BadHttpMessage("Request has invalid `Transfer-Encoding`")
  431. if hdrs.CONTENT_LENGTH in headers:
  432. raise BadHttpMessage(
  433. "Content-Length can't be present with Transfer-Encoding",
  434. )
  435. return (headers, raw_headers, close_conn, encoding, upgrade, chunked)
  436. def set_upgraded(self, val: bool) -> None:
  437. """Set connection upgraded (to websocket) mode.
  438. :param bool val: new state.
  439. """
  440. self._upgraded = val
  441. class HttpRequestParser(HttpParser[RawRequestMessage]):
  442. """Read request status line.
  443. Exception .http_exceptions.BadStatusLine
  444. could be raised in case of any errors in status line.
  445. Returns RawRequestMessage.
  446. """
  447. def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
  448. # request line
  449. line = lines[0].decode("utf-8", "surrogateescape")
  450. try:
  451. method, path, version = line.split(None, 2)
  452. except ValueError:
  453. raise BadStatusLine(line) from None
  454. if len(path) > self.max_line_size:
  455. raise LineTooLong(
  456. "Status line is too long", str(self.max_line_size), str(len(path))
  457. )
  458. path_part, _hash_separator, url_fragment = path.partition("#")
  459. path_part, _question_mark_separator, qs_part = path_part.partition("?")
  460. # method
  461. if not METHRE.match(method):
  462. raise BadStatusLine(method)
  463. # version
  464. try:
  465. if version.startswith("HTTP/"):
  466. n1, n2 = version[5:].split(".", 1)
  467. version_o = HttpVersion(int(n1), int(n2))
  468. else:
  469. raise BadStatusLine(version)
  470. except Exception:
  471. raise BadStatusLine(version)
  472. # read headers
  473. (
  474. headers,
  475. raw_headers,
  476. close,
  477. compression,
  478. upgrade,
  479. chunked,
  480. ) = self.parse_headers(lines)
  481. if close is None: # then the headers weren't set in the request
  482. if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close
  483. close = True
  484. else: # HTTP 1.1 must ask to close.
  485. close = False
  486. return RawRequestMessage(
  487. method,
  488. path,
  489. version_o,
  490. headers,
  491. raw_headers,
  492. close,
  493. compression,
  494. upgrade,
  495. chunked,
  496. # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based
  497. # NOTE: parser does, otherwise it results into the same
  498. # NOTE: HTTP Request-Line input producing different
  499. # NOTE: `yarl.URL()` objects
  500. URL.build(
  501. path=path_part,
  502. query_string=qs_part,
  503. fragment=url_fragment,
  504. encoded=True,
  505. ),
  506. )
  507. class HttpResponseParser(HttpParser[RawResponseMessage]):
  508. """Read response status line and headers.
  509. BadStatusLine could be raised in case of any errors in status line.
  510. Returns RawResponseMessage.
  511. """
  512. def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
  513. line = lines[0].decode("utf-8", "surrogateescape")
  514. try:
  515. version, status = line.split(None, 1)
  516. except ValueError:
  517. raise BadStatusLine(line) from None
  518. try:
  519. status, reason = status.split(None, 1)
  520. except ValueError:
  521. reason = ""
  522. if len(reason) > self.max_line_size:
  523. raise LineTooLong(
  524. "Status line is too long", str(self.max_line_size), str(len(reason))
  525. )
  526. # version
  527. match = VERSRE.match(version)
  528. if match is None:
  529. raise BadStatusLine(line)
  530. version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
  531. # The status code is a three-digit number
  532. try:
  533. status_i = int(status)
  534. except ValueError:
  535. raise BadStatusLine(line) from None
  536. if status_i > 999:
  537. raise BadStatusLine(line)
  538. # read headers
  539. (
  540. headers,
  541. raw_headers,
  542. close,
  543. compression,
  544. upgrade,
  545. chunked,
  546. ) = self.parse_headers(lines)
  547. if close is None:
  548. close = version_o <= HttpVersion10
  549. return RawResponseMessage(
  550. version_o,
  551. status_i,
  552. reason.strip(),
  553. headers,
  554. raw_headers,
  555. close,
  556. compression,
  557. upgrade,
  558. chunked,
  559. )
  560. class HttpPayloadParser:
  561. def __init__(
  562. self,
  563. payload: StreamReader,
  564. length: Optional[int] = None,
  565. chunked: bool = False,
  566. compression: Optional[str] = None,
  567. code: Optional[int] = None,
  568. method: Optional[str] = None,
  569. readall: bool = False,
  570. response_with_body: bool = True,
  571. auto_decompress: bool = True,
  572. ) -> None:
  573. self._length = 0
  574. self._type = ParseState.PARSE_NONE
  575. self._chunk = ChunkState.PARSE_CHUNKED_SIZE
  576. self._chunk_size = 0
  577. self._chunk_tail = b""
  578. self._auto_decompress = auto_decompress
  579. self.done = False
  580. # payload decompression wrapper
  581. if response_with_body and compression and self._auto_decompress:
  582. real_payload = DeflateBuffer(
  583. payload, compression
  584. ) # type: Union[StreamReader, DeflateBuffer]
  585. else:
  586. real_payload = payload
  587. # payload parser
  588. if not response_with_body:
  589. # don't parse payload if it's not expected to be received
  590. self._type = ParseState.PARSE_NONE
  591. real_payload.feed_eof()
  592. self.done = True
  593. elif chunked:
  594. self._type = ParseState.PARSE_CHUNKED
  595. elif length is not None:
  596. self._type = ParseState.PARSE_LENGTH
  597. self._length = length
  598. if self._length == 0:
  599. real_payload.feed_eof()
  600. self.done = True
  601. else:
  602. if readall and code != 204:
  603. self._type = ParseState.PARSE_UNTIL_EOF
  604. elif method in ("PUT", "POST"):
  605. internal_logger.warning( # pragma: no cover
  606. "Content-Length or Transfer-Encoding header is required"
  607. )
  608. self._type = ParseState.PARSE_NONE
  609. real_payload.feed_eof()
  610. self.done = True
  611. self.payload = real_payload
  612. def feed_eof(self) -> None:
  613. if self._type == ParseState.PARSE_UNTIL_EOF:
  614. self.payload.feed_eof()
  615. elif self._type == ParseState.PARSE_LENGTH:
  616. raise ContentLengthError(
  617. "Not enough data for satisfy content length header."
  618. )
  619. elif self._type == ParseState.PARSE_CHUNKED:
  620. raise TransferEncodingError(
  621. "Not enough data for satisfy transfer length header."
  622. )
  623. def feed_data(
  624. self, chunk: bytes, SEP: bytes = b"\r\n", CHUNK_EXT: bytes = b";"
  625. ) -> Tuple[bool, bytes]:
  626. # Read specified amount of bytes
  627. if self._type == ParseState.PARSE_LENGTH:
  628. required = self._length
  629. chunk_len = len(chunk)
  630. if required >= chunk_len:
  631. self._length = required - chunk_len
  632. self.payload.feed_data(chunk, chunk_len)
  633. if self._length == 0:
  634. self.payload.feed_eof()
  635. return True, b""
  636. else:
  637. self._length = 0
  638. self.payload.feed_data(chunk[:required], required)
  639. self.payload.feed_eof()
  640. return True, chunk[required:]
  641. # Chunked transfer encoding parser
  642. elif self._type == ParseState.PARSE_CHUNKED:
  643. if self._chunk_tail:
  644. chunk = self._chunk_tail + chunk
  645. self._chunk_tail = b""
  646. while chunk:
  647. # read next chunk size
  648. if self._chunk == ChunkState.PARSE_CHUNKED_SIZE:
  649. pos = chunk.find(SEP)
  650. if pos >= 0:
  651. i = chunk.find(CHUNK_EXT, 0, pos)
  652. if i >= 0:
  653. size_b = chunk[:i] # strip chunk-extensions
  654. else:
  655. size_b = chunk[:pos]
  656. try:
  657. size = int(bytes(size_b), 16)
  658. except ValueError:
  659. exc = TransferEncodingError(
  660. chunk[:pos].decode("ascii", "surrogateescape")
  661. )
  662. self.payload.set_exception(exc)
  663. raise exc from None
  664. chunk = chunk[pos + 2 :]
  665. if size == 0: # eof marker
  666. self._chunk = ChunkState.PARSE_MAYBE_TRAILERS
  667. else:
  668. self._chunk = ChunkState.PARSE_CHUNKED_CHUNK
  669. self._chunk_size = size
  670. self.payload.begin_http_chunk_receiving()
  671. else:
  672. self._chunk_tail = chunk
  673. return False, b""
  674. # read chunk and feed buffer
  675. if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK:
  676. required = self._chunk_size
  677. chunk_len = len(chunk)
  678. if required > chunk_len:
  679. self._chunk_size = required - chunk_len
  680. self.payload.feed_data(chunk, chunk_len)
  681. return False, b""
  682. else:
  683. self._chunk_size = 0
  684. self.payload.feed_data(chunk[:required], required)
  685. chunk = chunk[required:]
  686. self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF
  687. self.payload.end_http_chunk_receiving()
  688. # toss the CRLF at the end of the chunk
  689. if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF:
  690. if chunk[:2] == SEP:
  691. chunk = chunk[2:]
  692. self._chunk = ChunkState.PARSE_CHUNKED_SIZE
  693. else:
  694. self._chunk_tail = chunk
  695. return False, b""
  696. # if stream does not contain trailer, after 0\r\n
  697. # we should get another \r\n otherwise
  698. # trailers needs to be skiped until \r\n\r\n
  699. if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS:
  700. head = chunk[:2]
  701. if head == SEP:
  702. # end of stream
  703. self.payload.feed_eof()
  704. return True, chunk[2:]
  705. # Both CR and LF, or only LF may not be received yet. It is
  706. # expected that CRLF or LF will be shown at the very first
  707. # byte next time, otherwise trailers should come. The last
  708. # CRLF which marks the end of response might not be
  709. # contained in the same TCP segment which delivered the
  710. # size indicator.
  711. if not head:
  712. return False, b""
  713. if head == SEP[:1]:
  714. self._chunk_tail = head
  715. return False, b""
  716. self._chunk = ChunkState.PARSE_TRAILERS
  717. # read and discard trailer up to the CRLF terminator
  718. if self._chunk == ChunkState.PARSE_TRAILERS:
  719. pos = chunk.find(SEP)
  720. if pos >= 0:
  721. chunk = chunk[pos + 2 :]
  722. self._chunk = ChunkState.PARSE_MAYBE_TRAILERS
  723. else:
  724. self._chunk_tail = chunk
  725. return False, b""
  726. # Read all bytes until eof
  727. elif self._type == ParseState.PARSE_UNTIL_EOF:
  728. self.payload.feed_data(chunk, len(chunk))
  729. return False, b""
  730. class DeflateBuffer:
  731. """DeflateStream decompress stream and feed data into specified stream."""
  732. decompressor: Any
  733. def __init__(self, out: StreamReader, encoding: Optional[str]) -> None:
  734. self.out = out
  735. self.size = 0
  736. self.encoding = encoding
  737. self._started_decoding = False
  738. if encoding == "br":
  739. if not HAS_BROTLI: # pragma: no cover
  740. raise ContentEncodingError(
  741. "Can not decode content-encoding: brotli (br). "
  742. "Please install `Brotli`"
  743. )
  744. class BrotliDecoder:
  745. # Supports both 'brotlipy' and 'Brotli' packages
  746. # since they share an import name. The top branches
  747. # are for 'brotlipy' and bottom branches for 'Brotli'
  748. def __init__(self) -> None:
  749. self._obj = brotli.Decompressor()
  750. def decompress(self, data: bytes) -> bytes:
  751. if hasattr(self._obj, "decompress"):
  752. return cast(bytes, self._obj.decompress(data))
  753. return cast(bytes, self._obj.process(data))
  754. def flush(self) -> bytes:
  755. if hasattr(self._obj, "flush"):
  756. return cast(bytes, self._obj.flush())
  757. return b""
  758. self.decompressor = BrotliDecoder()
  759. else:
  760. zlib_mode = 16 + zlib.MAX_WBITS if encoding == "gzip" else zlib.MAX_WBITS
  761. self.decompressor = zlib.decompressobj(wbits=zlib_mode)
  762. def set_exception(self, exc: BaseException) -> None:
  763. self.out.set_exception(exc)
  764. def feed_data(self, chunk: bytes, size: int) -> None:
  765. if not size:
  766. return
  767. self.size += size
  768. # RFC1950
  769. # bits 0..3 = CM = 0b1000 = 8 = "deflate"
  770. # bits 4..7 = CINFO = 1..7 = windows size.
  771. if (
  772. not self._started_decoding
  773. and self.encoding == "deflate"
  774. and chunk[0] & 0xF != 8
  775. ):
  776. # Change the decoder to decompress incorrectly compressed data
  777. # Actually we should issue a warning about non-RFC-compliant data.
  778. self.decompressor = zlib.decompressobj(wbits=-zlib.MAX_WBITS)
  779. try:
  780. chunk = self.decompressor.decompress(chunk)
  781. except Exception:
  782. raise ContentEncodingError(
  783. "Can not decode content-encoding: %s" % self.encoding
  784. )
  785. self._started_decoding = True
  786. if chunk:
  787. self.out.feed_data(chunk, len(chunk))
  788. def feed_eof(self) -> None:
  789. chunk = self.decompressor.flush()
  790. if chunk or self.size > 0:
  791. self.out.feed_data(chunk, len(chunk))
  792. if self.encoding == "deflate" and not self.decompressor.eof:
  793. raise ContentEncodingError("deflate")
  794. self.out.feed_eof()
  795. def begin_http_chunk_receiving(self) -> None:
  796. self.out.begin_http_chunk_receiving()
  797. def end_http_chunk_receiving(self) -> None:
  798. self.out.end_http_chunk_receiving()
  799. HttpRequestParserPy = HttpRequestParser
  800. HttpResponseParserPy = HttpResponseParser
  801. RawRequestMessagePy = RawRequestMessage
  802. RawResponseMessagePy = RawResponseMessage
  803. try:
  804. if not NO_EXTENSIONS:
  805. from ._http_parser import ( # type: ignore[import,no-redef]
  806. HttpRequestParser,
  807. HttpResponseParser,
  808. RawRequestMessage,
  809. RawResponseMessage,
  810. )
  811. HttpRequestParserC = HttpRequestParser
  812. HttpResponseParserC = HttpResponseParser
  813. RawRequestMessageC = RawRequestMessage
  814. RawResponseMessageC = RawResponseMessage
  815. except ImportError: # pragma: no cover
  816. pass