http_parser.h 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. #pragma once
  2. #include <util/generic/string.h>
  3. #include <util/generic/strbuf.h>
  4. #include <util/generic/yexception.h>
  5. #include <util/generic/hash_set.h>
  6. #include <util/string/cast.h>
  7. #include <library/cpp/http/io/stream.h>
  8. struct THttpVersion {
  9. unsigned Major = 1;
  10. unsigned Minor = 0;
  11. };
  12. //http requests parser for async/callbacks arch. (uggly state-machine)
  13. //usage, - call Parse(...), if returned 'true' - all message parsed,
  14. //external (non entered in message) bytes in input data counted by GetExtraDataSize()
  15. class THttpParser {
  16. public:
  17. enum TMessageType {
  18. Request,
  19. Response
  20. };
  21. THttpParser(TMessageType mt = Response)
  22. : Parser_(&THttpParser::FirstLineParser)
  23. , MessageType_(mt)
  24. {
  25. }
  26. inline void DisableCollectingHeaders() noexcept {
  27. CollectHeaders_ = false;
  28. }
  29. inline void SetGzipAllowMultipleStreams(bool allow) noexcept {
  30. GzipAllowMultipleStreams_ = allow;
  31. }
  32. inline void DisableDecodeContent() noexcept {
  33. DecodeContent_ = false;
  34. }
  35. /*
  36. * Disable message-body parsing.
  37. * Useful for parse HEAD method responses
  38. */
  39. inline void BodyNotExpected() {
  40. BodyNotExpected_ = true;
  41. }
  42. /// @return true on end parsing (GetExtraDataSize() return amount not used bytes)
  43. /// throw exception on bad http format (unsupported encoding, etc)
  44. /// sz == 0 signaling end of input stream
  45. bool Parse(const char* data, size_t sz) {
  46. if (ParseImpl(data, sz)) {
  47. DecodeContent();
  48. return true;
  49. }
  50. return false;
  51. }
  52. const char* Data() const noexcept {
  53. return Data_;
  54. }
  55. size_t GetExtraDataSize() const noexcept {
  56. return ExtraDataSize_;
  57. }
  58. const TString& FirstLine() const noexcept {
  59. return FirstLine_;
  60. }
  61. unsigned RetCode() const noexcept {
  62. return RetCode_;
  63. }
  64. const THttpVersion& HttpVersion() const noexcept {
  65. return HttpVersion_;
  66. }
  67. const THttpHeaders& Headers() const noexcept {
  68. return Headers_;
  69. }
  70. bool IsKeepAlive() const noexcept {
  71. return KeepAlive_;
  72. }
  73. bool GetContentLength(ui64& value) const noexcept {
  74. if (!HasContentLength_) {
  75. return false;
  76. }
  77. value = ContentLength_;
  78. return true;
  79. }
  80. TString GetBestCompressionScheme() const;
  81. const TString& Content() const noexcept {
  82. return Content_;
  83. }
  84. const TString& DecodedContent() const noexcept {
  85. return DecodedContent_;
  86. }
  87. void Prepare() {
  88. HeaderLine_.reserve(128);
  89. FirstLine_.reserve(128);
  90. }
  91. private:
  92. bool ParseImpl(const char* data, size_t sz) {
  93. Data_ = data;
  94. DataEnd_ = data + sz;
  95. if (sz == 0) {
  96. OnEof();
  97. return true;
  98. }
  99. return (this->*Parser_)();
  100. }
  101. // stage parsers
  102. bool FirstLineParser();
  103. bool HeadersParser();
  104. bool ContentParser();
  105. bool ChunkedContentParser();
  106. bool OnEndParsing();
  107. // continue read to CurrentLine_
  108. bool ReadLine();
  109. void ParseHttpVersion(TStringBuf httpVersion);
  110. void ParseHeaderLine();
  111. void OnEof();
  112. bool DecodeContent();
  113. void ApplyHeaderLine(const TStringBuf& name, const TStringBuf& val);
  114. typedef bool (THttpParser::*TParser)();
  115. TParser Parser_; //current parser (stage)
  116. TMessageType MessageType_ = Response;
  117. bool CollectHeaders_ = true;
  118. bool GzipAllowMultipleStreams_ = true;
  119. bool DecodeContent_ = true;
  120. bool BodyNotExpected_ = false;
  121. // parsed data
  122. const char* Data_ = nullptr;
  123. const char* DataEnd_ = nullptr;
  124. TString CurrentLine_;
  125. TString HeaderLine_;
  126. size_t ExtraDataSize_ = 0;
  127. // headers
  128. TString FirstLine_;
  129. THttpVersion HttpVersion_;
  130. unsigned RetCode_ = 0;
  131. THttpHeaders Headers_;
  132. bool KeepAlive_ = false;
  133. THashSet<TString> AcceptEncodings_;
  134. TString ContentEncoding_;
  135. bool HasContentLength_ = false;
  136. ui64 ContentLength_ = 0;
  137. struct TChunkInputState {
  138. size_t LeftBytes_ = 0;
  139. bool ReadLastChunk_ = false;
  140. };
  141. TAutoPtr<TChunkInputState> ChunkInputState_;
  142. TString Content_;
  143. TString DecodedContent_;
  144. };