http_parser.h 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. #pragma once
  2. #include <util/generic/string.h>
  3. #include <util/generic/strbuf.h>
  4. #include <util/generic/yexception.h>
  5. #include <util/generic/hash_set.h>
  6. #include <util/string/cast.h>
  7. #include <library/cpp/http/io/stream.h>
  8. struct THttpVersion {
  9. unsigned Major = 1;
  10. unsigned Minor = 0;
  11. };
  12. //http requests parser for async/callbacks arch. (uggly state-machine)
  13. //usage, - call Parse(...), if returned 'true' - all message parsed,
  14. //external (non entered in message) bytes in input data counted by GetExtraDataSize()
  15. class THttpParser {
  16. public:
  17. enum TMessageType {
  18. Request,
  19. Response
  20. };
  21. THttpParser(TMessageType mt = Response)
  22. : Parser_(&THttpParser::FirstLineParser)
  23. , MessageType_(mt)
  24. {
  25. }
  26. inline void DisableCollectingHeaders() noexcept {
  27. CollectHeaders_ = false;
  28. }
  29. inline void SetGzipAllowMultipleStreams(bool allow) noexcept {
  30. GzipAllowMultipleStreams_ = allow;
  31. }
  32. inline void DisableDecodeContent() noexcept {
  33. DecodeContent_ = false;
  34. }
  35. /*
  36. * Disable message-body parsing.
  37. * Useful for parse HEAD method responses
  38. */
  39. inline void BodyNotExpected() {
  40. BodyNotExpected_ = true;
  41. }
  42. /// @return true on end parsing (GetExtraDataSize() return amount not used bytes)
  43. /// throw exception on bad http format (unsupported encoding, etc)
  44. /// sz == 0 signaling end of input stream
  45. bool Parse(const char* data, size_t sz) {
  46. if (ParseImpl(data, sz)) {
  47. if (DecodeContent_) {
  48. DecodeContent(DecodedContent_);
  49. }
  50. return true;
  51. }
  52. return false;
  53. }
  54. const char* Data() const noexcept {
  55. return Data_;
  56. }
  57. size_t GetExtraDataSize() const noexcept {
  58. return ExtraDataSize_;
  59. }
  60. const TString& FirstLine() const noexcept {
  61. return FirstLine_;
  62. }
  63. unsigned RetCode() const noexcept {
  64. return RetCode_;
  65. }
  66. const THttpVersion& HttpVersion() const noexcept {
  67. return HttpVersion_;
  68. }
  69. const THttpHeaders& Headers() const noexcept {
  70. return Headers_;
  71. }
  72. bool IsKeepAlive() const noexcept {
  73. return KeepAlive_;
  74. }
  75. bool GetContentLength(ui64& value) const noexcept {
  76. if (!HasContentLength_) {
  77. return false;
  78. }
  79. value = ContentLength_;
  80. return true;
  81. }
  82. TString GetBestCompressionScheme() const;
  83. const TString& Content() const noexcept {
  84. return Content_;
  85. }
  86. const TString& DecodedContent() const noexcept {
  87. return DecodedContent_;
  88. }
  89. void Prepare() {
  90. HeaderLine_.reserve(128);
  91. FirstLine_.reserve(128);
  92. }
  93. bool DecodeContent(TString& decodedContent) const;
  94. private:
  95. bool ParseImpl(const char* data, size_t sz) {
  96. Data_ = data;
  97. DataEnd_ = data + sz;
  98. if (sz == 0) {
  99. OnEof();
  100. return true;
  101. }
  102. return (this->*Parser_)();
  103. }
  104. // stage parsers
  105. bool FirstLineParser();
  106. bool HeadersParser();
  107. bool ContentParser();
  108. bool ChunkedContentParser();
  109. bool OnEndParsing();
  110. // continue read to CurrentLine_
  111. bool ReadLine();
  112. void ParseHttpVersion(TStringBuf httpVersion);
  113. void ParseHeaderLine();
  114. void OnEof();
  115. void ApplyHeaderLine(const TStringBuf& name, const TStringBuf& val);
  116. typedef bool (THttpParser::*TParser)();
  117. TParser Parser_; //current parser (stage)
  118. TMessageType MessageType_ = Response;
  119. bool CollectHeaders_ = true;
  120. bool GzipAllowMultipleStreams_ = true;
  121. bool DecodeContent_ = true;
  122. bool BodyNotExpected_ = false;
  123. // parsed data
  124. const char* Data_ = nullptr;
  125. const char* DataEnd_ = nullptr;
  126. TString CurrentLine_;
  127. TString HeaderLine_;
  128. size_t ExtraDataSize_ = 0;
  129. // headers
  130. TString FirstLine_;
  131. THttpVersion HttpVersion_;
  132. unsigned RetCode_ = 0;
  133. THttpHeaders Headers_;
  134. bool KeepAlive_ = false;
  135. THashSet<TString> AcceptEncodings_;
  136. TString ContentEncoding_;
  137. bool HasContentLength_ = false;
  138. ui64 ContentLength_ = 0;
  139. struct TChunkInputState {
  140. size_t LeftBytes_ = 0;
  141. bool ReadLastChunk_ = false;
  142. };
  143. TAutoPtr<TChunkInputState> ChunkInputState_;
  144. TString Content_;
  145. TString DecodedContent_;
  146. };