httpreqdata.cpp 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. #include "httpreqdata.h"
  2. #include <library/cpp/case_insensitive_string/case_insensitive_string.h>
  3. #include <util/stream/mem.h>
  4. #include <util/string/join.h>
  5. #include <array>
  6. #ifdef _sse4_2_
  7. #include <smmintrin.h>
  8. #endif
  9. TBaseServerRequestData::TBaseServerRequestData(SOCKET s)
  10. : Socket_(s)
  11. , BeginTime_(MicroSeconds())
  12. {
  13. }
  14. TBaseServerRequestData::TBaseServerRequestData(TStringBuf qs, SOCKET s)
  15. : Query_(qs)
  16. , OrigQuery_(Query_)
  17. , Socket_(s)
  18. , BeginTime_(MicroSeconds())
  19. {
  20. }
  21. void TBaseServerRequestData::AppendQueryString(TStringBuf str) {
  22. if (Y_UNLIKELY(!Query_.empty())) {
  23. TStringBuf separator = !Query_.EndsWith('&') && !str.StartsWith('&') ? "&"sv : ""sv;
  24. ModifiedQueryString_ = TString::Join(Query_, separator, str);
  25. } else {
  26. ModifiedQueryString_ = str;
  27. }
  28. Query_ = ModifiedQueryString_;
  29. }
  30. void TBaseServerRequestData::SetRemoteAddr(TStringBuf addr) {
  31. Addr_.ConstructInPlace(addr.substr(0, INET6_ADDRSTRLEN - 1));
  32. }
  33. TStringBuf TBaseServerRequestData::RemoteAddr() const {
  34. if (!Addr_) {
  35. auto& addr = Addr_.ConstructInPlace();
  36. addr.ReserveAndResize(INET6_ADDRSTRLEN);
  37. if (GetRemoteAddr(Socket_, addr.begin(), addr.size())) {
  38. if (auto pos = addr.find('\0'); pos != TString::npos) {
  39. addr.resize(pos);
  40. }
  41. } else {
  42. addr.clear();
  43. }
  44. }
  45. return *Addr_;
  46. }
  47. const TString* TBaseServerRequestData::HeaderIn(TStringBuf key) const {
  48. return HeadersIn_.FindPtr(key);
  49. }
  50. TStringBuf TBaseServerRequestData::HeaderInOrEmpty(TStringBuf key) const {
  51. const auto* ptr = HeaderIn(key);
  52. return ptr ? TStringBuf{*ptr} : TStringBuf{};
  53. }
  54. TString TBaseServerRequestData::HeaderByIndex(size_t n) const noexcept {
  55. if (n >= HeadersIn_.size()) {
  56. return {};
  57. }
  58. const auto& [key, value] = *std::next(HeadersIn_.begin(), n);
  59. return TString::Join(key, ": ", value);
  60. }
  61. TStringBuf TBaseServerRequestData::Environment(TStringBuf key) const {
  62. TCaseInsensitiveStringBuf ciKey(key.data(), key.size());
  63. if (ciKey == "REMOTE_ADDR") {
  64. const auto ip = HeaderIn("X-Real-IP");
  65. return ip ? *ip : RemoteAddr();
  66. } else if (ciKey == "QUERY_STRING") {
  67. return Query();
  68. } else if (ciKey == "SERVER_NAME") {
  69. return ServerName();
  70. } else if (ciKey == "SERVER_PORT") {
  71. return ServerPort();
  72. } else if (ciKey == "SCRIPT_NAME") {
  73. return ScriptName();
  74. }
  75. return {};
  76. }
  77. void TBaseServerRequestData::Clear() {
  78. HeadersIn_.clear();
  79. Addr_ = Nothing();
  80. Path_.clear();
  81. Query_ = {};
  82. OrigQuery_ = {};
  83. Host_.clear();
  84. Port_.clear();
  85. CurPage_.remove();
  86. ParseBuf_.clear();
  87. BeginTime_ = MicroSeconds();
  88. }
  89. const TString& TBaseServerRequestData::GetCurPage() const {
  90. if (!CurPage_ && Host_) {
  91. std::array<TStringBuf, 7> fragments;
  92. auto fragmentIt = fragments.begin();
  93. *fragmentIt++ = "http://"sv;
  94. *fragmentIt++ = Host_;
  95. if (Port_) {
  96. *fragmentIt++ = ":"sv;
  97. *fragmentIt++ = Port_;
  98. }
  99. *fragmentIt++ = Path_;
  100. if (!Query_.empty()) {
  101. *fragmentIt++ = "?"sv;
  102. *fragmentIt++ = Query_;
  103. }
  104. CurPage_ = JoinRange(""sv, fragments.begin(), fragmentIt);
  105. }
  106. return CurPage_;
  107. }
  108. bool TBaseServerRequestData::Parse(TStringBuf origReq) {
  109. ParseBuf_.reserve(origReq.size() + 16);
  110. ParseBuf_.assign(origReq.begin(), origReq.end());
  111. ParseBuf_.insert(ParseBuf_.end(), 15, ' ');
  112. ParseBuf_.push_back('\0');
  113. char* req = ParseBuf_.data();
  114. while (*req == ' ' || *req == '\t')
  115. req++;
  116. if (*req != '/')
  117. return false; // we are not a proxy
  118. while (req[1] == '/') // remove redundant slashes
  119. req++;
  120. char* pathBegin = req;
  121. char* queryBegin = nullptr;
  122. #ifdef _sse4_2_
  123. const __m128i simdSpace = _mm_set1_epi8(' ');
  124. const __m128i simdTab = _mm_set1_epi8('\t');
  125. const __m128i simdHash = _mm_set1_epi8('#');
  126. const __m128i simdQuestion = _mm_set1_epi8('?');
  127. auto isEnd = [=](__m128i x) {
  128. const auto v = _mm_or_si128(
  129. _mm_or_si128(
  130. _mm_cmpeq_epi8(x, simdSpace), _mm_cmpeq_epi8(x, simdTab)),
  131. _mm_cmpeq_epi8(x, simdHash));
  132. return !_mm_testz_si128(v, v);
  133. };
  134. // No need for the range check because we have padding of spaces at the end.
  135. for (;; req += 16) {
  136. const auto x = _mm_loadu_si128(reinterpret_cast<const __m128i *>(req));
  137. const auto isQuestionSimd = _mm_cmpeq_epi8(x, simdQuestion);
  138. const auto isQuestion = !_mm_testz_si128(isQuestionSimd, isQuestionSimd);
  139. if (isEnd(x)) {
  140. if (isQuestion) {
  141. // The prospective query end and a question sign are both in the
  142. // current block. Need to find out which comes first.
  143. for (;*req != ' ' && *req != '\t' && *req != '#'; ++req) {
  144. if (*req == '?') {
  145. queryBegin = req + 1;
  146. break;
  147. }
  148. }
  149. }
  150. break;
  151. }
  152. if (isQuestion) {
  153. // Find the exact query beginning
  154. for (queryBegin = req; *queryBegin != '?'; ++queryBegin) {
  155. }
  156. ++queryBegin;
  157. break;
  158. }
  159. }
  160. // If we bailed out because we found query string begin. Now look for the the end of the query
  161. if (queryBegin) {
  162. for (;; req += 16) {
  163. const auto x = _mm_loadu_si128(reinterpret_cast<const __m128i *>(req));
  164. if (isEnd(x)) {
  165. break;
  166. }
  167. }
  168. }
  169. #else
  170. for (;*req != ' ' && *req != '\t' && *req != '#'; ++req) {
  171. if (*req == '?') {
  172. queryBegin = req + 1;
  173. break;
  174. }
  175. }
  176. #endif
  177. while (*req != ' ' && *req != '\t' && *req != '#') {
  178. ++req;
  179. }
  180. char* pathEnd = queryBegin ? queryBegin - 1 : req;
  181. // Make sure Path_ and Query_ are actually zero-reminated.
  182. *pathEnd = '\0';
  183. *req = '\0';
  184. Path_ = TStringBuf{pathBegin, pathEnd};
  185. if (queryBegin) {
  186. Query_ = TStringBuf{queryBegin, req};
  187. OrigQuery_ = Query_;
  188. } else {
  189. Query_ = {};
  190. OrigQuery_ = {};
  191. }
  192. return true;
  193. }
  194. void TBaseServerRequestData::AddHeader(const TString& name, const TString& value) {
  195. HeadersIn_[name] = value;
  196. if (stricmp(name.data(), "Host") == 0) {
  197. size_t hostLen = strcspn(value.data(), ":");
  198. if (value[hostLen] == ':')
  199. Port_ = value.substr(hostLen + 1);
  200. Host_ = value.substr(0, hostLen);
  201. }
  202. }
  203. void TBaseServerRequestData::SetPath(TString path) {
  204. Path_ = std::move(path);
  205. }