http_parser.h 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. #pragma once
  2. #include <util/generic/string.h>
  3. #include <util/generic/strbuf.h>
  4. #include <util/generic/yexception.h>
  5. #include <util/generic/hash_set.h>
  6. #include <util/string/cast.h>
  7. #include <library/cpp/http/io/stream.h>
  8. struct THttpVersion {
  9. unsigned Major = 1;
  10. unsigned Minor = 0;
  11. };
  12. //http requests parser for async/callbacks arch. (uggly state-machine)
  13. //usage, - call Parse(...), if returned 'true' - all message parsed,
  14. //external (non entered in message) bytes in input data counted by GetExtraDataSize()
  15. class THttpParser {
  16. public:
  17. enum TMessageType {
  18. Request,
  19. Response
  20. };
  21. THttpParser(TMessageType mt = Response)
  22. : Parser_(&THttpParser::FirstLineParser)
  23. , MessageType_(mt)
  24. {
  25. }
  26. inline void DisableCollectingHeaders() noexcept {
  27. CollectHeaders_ = false;
  28. }
  29. inline void SetGzipAllowMultipleStreams(bool allow) noexcept {
  30. GzipAllowMultipleStreams_ = allow;
  31. }
  32. inline void DisableDecodeContent() noexcept {
  33. DecodeContent_ = false;
  34. }
  35. /*
  36. * Disable message-body parsing.
  37. * Useful for parse HEAD method responses
  38. */
  39. inline void BodyNotExpected() {
  40. BodyNotExpected_ = true;
  41. }
  42. /// @return true on end parsing (GetExtraDataSize() return amount not used bytes)
  43. /// throw exception on bad http format (unsupported encoding, etc)
  44. /// sz == 0 signaling end of input stream
  45. bool Parse(const char* data, size_t sz) {
  46. if (ParseImpl(data, sz)) {
  47. if (DecodeContent_) {
  48. DecodeContent(DecodedContent_);
  49. }
  50. return true;
  51. }
  52. return false;
  53. }
  54. const char* Data() const noexcept {
  55. return Data_;
  56. }
  57. size_t GetExtraDataSize() const noexcept {
  58. return ExtraDataSize_;
  59. }
  60. const TString& FirstLine() const noexcept {
  61. return FirstLine_;
  62. }
  63. unsigned RetCode() const noexcept {
  64. return RetCode_;
  65. }
  66. const THttpVersion& HttpVersion() const noexcept {
  67. return HttpVersion_;
  68. }
  69. const THttpHeaders& Headers() const noexcept {
  70. return Headers_;
  71. }
  72. bool IsKeepAlive() const noexcept {
  73. return KeepAlive_;
  74. }
  75. bool GetContentLength(ui64& value) const noexcept {
  76. if (!HasContentLength_) {
  77. return false;
  78. }
  79. value = ContentLength_;
  80. return true;
  81. }
  82. TString GetBestCompressionScheme() const;
  83. const THashSet<TString>& AcceptedEncodings() const;
  84. const TString& Content() const noexcept {
  85. return Content_;
  86. }
  87. const TString& DecodedContent() const noexcept {
  88. return DecodedContent_;
  89. }
  90. void Prepare() {
  91. HeaderLine_.reserve(128);
  92. FirstLine_.reserve(128);
  93. }
  94. bool DecodeContent(TString& decodedContent) const;
  95. private:
  96. bool ParseImpl(const char* data, size_t sz) {
  97. Data_ = data;
  98. DataEnd_ = data + sz;
  99. if (sz == 0) {
  100. OnEof();
  101. return true;
  102. }
  103. return (this->*Parser_)();
  104. }
  105. // stage parsers
  106. bool FirstLineParser();
  107. bool HeadersParser();
  108. bool ContentParser();
  109. bool ChunkedContentParser();
  110. bool OnEndParsing();
  111. // continue read to CurrentLine_
  112. bool ReadLine();
  113. void ParseHttpVersion(TStringBuf httpVersion);
  114. void ParseHeaderLine();
  115. void OnEof();
  116. void ApplyHeaderLine(const TStringBuf& name, const TStringBuf& val);
  117. typedef bool (THttpParser::*TParser)();
  118. TParser Parser_; //current parser (stage)
  119. TMessageType MessageType_ = Response;
  120. bool CollectHeaders_ = true;
  121. bool GzipAllowMultipleStreams_ = true;
  122. bool DecodeContent_ = true;
  123. bool BodyNotExpected_ = false;
  124. // parsed data
  125. const char* Data_ = nullptr;
  126. const char* DataEnd_ = nullptr;
  127. TString CurrentLine_;
  128. TString HeaderLine_;
  129. size_t ExtraDataSize_ = 0;
  130. // headers
  131. TString FirstLine_;
  132. THttpVersion HttpVersion_;
  133. unsigned RetCode_ = 0;
  134. THttpHeaders Headers_;
  135. bool KeepAlive_ = false;
  136. THashSet<TString> AcceptEncodings_;
  137. TString ContentEncoding_;
  138. bool HasContentLength_ = false;
  139. ui64 ContentLength_ = 0;
  140. struct TChunkInputState {
  141. size_t LeftBytes_ = 0;
  142. bool ReadLastChunk_ = false;
  143. };
  144. TAutoPtr<TChunkInputState> ChunkInputState_;
  145. TString Content_;
  146. TString DecodedContent_;
  147. };