123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238 |
- #include "httpreqdata.h"
- #include <library/cpp/case_insensitive_string/case_insensitive_string.h>
- #include <util/stream/mem.h>
- #include <util/string/join.h>
- #include <array>
- #ifdef _sse4_2_
- #include <smmintrin.h>
- #endif
- TBaseServerRequestData::TBaseServerRequestData(SOCKET s)
- : Socket_(s)
- , BeginTime_(MicroSeconds())
- {
- }
- TBaseServerRequestData::TBaseServerRequestData(TStringBuf qs, SOCKET s)
- : Query_(qs)
- , OrigQuery_(Query_)
- , Socket_(s)
- , BeginTime_(MicroSeconds())
- {
- }
- void TBaseServerRequestData::AppendQueryString(TStringBuf str) {
- if (Y_UNLIKELY(!Query_.empty())) {
- TStringBuf separator = !Query_.EndsWith('&') && !str.StartsWith('&') ? "&"sv : ""sv;
- ModifiedQueryString_ = TString::Join(Query_, separator, str);
- } else {
- ModifiedQueryString_ = str;
- }
- Query_ = ModifiedQueryString_;
- }
- void TBaseServerRequestData::SetRemoteAddr(TStringBuf addr) {
- Addr_.ConstructInPlace(addr.substr(0, INET6_ADDRSTRLEN - 1));
- }
- TStringBuf TBaseServerRequestData::RemoteAddr() const {
- if (!Addr_) {
- auto& addr = Addr_.ConstructInPlace();
- addr.ReserveAndResize(INET6_ADDRSTRLEN);
- if (GetRemoteAddr(Socket_, addr.begin(), addr.size())) {
- if (auto pos = addr.find('\0'); pos != TString::npos) {
- addr.resize(pos);
- }
- } else {
- addr.clear();
- }
- }
- return *Addr_;
- }
- const TString* TBaseServerRequestData::HeaderIn(TStringBuf key) const {
- return HeadersIn_.FindPtr(key);
- }
- TStringBuf TBaseServerRequestData::HeaderInOrEmpty(TStringBuf key) const {
- const auto* ptr = HeaderIn(key);
- return ptr ? TStringBuf{*ptr} : TStringBuf{};
- }
- TString TBaseServerRequestData::HeaderByIndex(size_t n) const noexcept {
- if (n >= HeadersIn_.size()) {
- return {};
- }
- const auto& [key, value] = *std::next(HeadersIn_.begin(), n);
- return TString::Join(key, ": ", value);
- }
- TStringBuf TBaseServerRequestData::Environment(TStringBuf key) const {
- TCaseInsensitiveStringBuf ciKey(key.data(), key.size());
- if (ciKey == "REMOTE_ADDR") {
- const auto ip = HeaderIn("X-Real-IP");
- return ip ? *ip : RemoteAddr();
- } else if (ciKey == "QUERY_STRING") {
- return Query();
- } else if (ciKey == "SERVER_NAME") {
- return ServerName();
- } else if (ciKey == "SERVER_PORT") {
- return ServerPort();
- } else if (ciKey == "SCRIPT_NAME") {
- return ScriptName();
- }
- return {};
- }
- void TBaseServerRequestData::Clear() {
- HeadersIn_.clear();
- Addr_ = Nothing();
- Path_.clear();
- Query_ = {};
- OrigQuery_ = {};
- Host_.clear();
- Port_.clear();
- CurPage_.remove();
- ParseBuf_.clear();
- BeginTime_ = MicroSeconds();
- }
- const TString& TBaseServerRequestData::GetCurPage() const {
- if (!CurPage_ && Host_) {
- std::array<TStringBuf, 7> fragments;
- auto fragmentIt = fragments.begin();
- *fragmentIt++ = "http://"sv;
- *fragmentIt++ = Host_;
- if (Port_) {
- *fragmentIt++ = ":"sv;
- *fragmentIt++ = Port_;
- }
- *fragmentIt++ = Path_;
- if (!Query_.empty()) {
- *fragmentIt++ = "?"sv;
- *fragmentIt++ = Query_;
- }
- CurPage_ = JoinRange(""sv, fragments.begin(), fragmentIt);
- }
- return CurPage_;
- }
- bool TBaseServerRequestData::Parse(TStringBuf origReq) {
- ParseBuf_.reserve(origReq.size() + 16);
- ParseBuf_.assign(origReq.begin(), origReq.end());
- ParseBuf_.insert(ParseBuf_.end(), 15, ' ');
- ParseBuf_.push_back('\0');
- char* req = ParseBuf_.data();
- while (*req == ' ' || *req == '\t')
- req++;
- if (*req != '/')
- return false; // we are not a proxy
- while (req[1] == '/') // remove redundant slashes
- req++;
- char* pathBegin = req;
- char* queryBegin = nullptr;
- #ifdef _sse4_2_
- const __m128i simdSpace = _mm_set1_epi8(' ');
- const __m128i simdTab = _mm_set1_epi8('\t');
- const __m128i simdHash = _mm_set1_epi8('#');
- const __m128i simdQuestion = _mm_set1_epi8('?');
- auto isEnd = [=](__m128i x) {
- const auto v = _mm_or_si128(
- _mm_or_si128(
- _mm_cmpeq_epi8(x, simdSpace), _mm_cmpeq_epi8(x, simdTab)),
- _mm_cmpeq_epi8(x, simdHash));
- return !_mm_testz_si128(v, v);
- };
- // No need for the range check because we have padding of spaces at the end.
- for (;; req += 16) {
- const auto x = _mm_loadu_si128(reinterpret_cast<const __m128i *>(req));
- const auto isQuestionSimd = _mm_cmpeq_epi8(x, simdQuestion);
- const auto isQuestion = !_mm_testz_si128(isQuestionSimd, isQuestionSimd);
- if (isEnd(x)) {
- if (isQuestion) {
- // The prospective query end and a question sign are both in the
- // current block. Need to find out which comes first.
- for (;*req != ' ' && *req != '\t' && *req != '#'; ++req) {
- if (*req == '?') {
- queryBegin = req + 1;
- break;
- }
- }
- }
- break;
- }
- if (isQuestion) {
- // Find the exact query beginning
- for (queryBegin = req; *queryBegin != '?'; ++queryBegin) {
- }
- ++queryBegin;
- break;
- }
- }
- // If we bailed out because we found query string begin. Now look for the the end of the query
- if (queryBegin) {
- for (;; req += 16) {
- const auto x = _mm_loadu_si128(reinterpret_cast<const __m128i *>(req));
- if (isEnd(x)) {
- break;
- }
- }
- }
- #else
- for (;*req != ' ' && *req != '\t' && *req != '#'; ++req) {
- if (*req == '?') {
- queryBegin = req + 1;
- break;
- }
- }
- #endif
- while (*req != ' ' && *req != '\t' && *req != '#') {
- ++req;
- }
- char* pathEnd = queryBegin ? queryBegin - 1 : req;
- // Make sure Path_ and Query_ are actually zero-reminated.
- *pathEnd = '\0';
- *req = '\0';
- Path_ = TStringBuf{pathBegin, pathEnd};
- if (queryBegin) {
- Query_ = TStringBuf{queryBegin, req};
- OrigQuery_ = Query_;
- } else {
- Query_ = {};
- OrigQuery_ = {};
- }
- return true;
- }
- void TBaseServerRequestData::AddHeader(const TString& name, const TString& value) {
- HeadersIn_[name] = value;
- if (stricmp(name.data(), "Host") == 0) {
- size_t hostLen = strcspn(value.data(), ":");
- if (value[hostLen] == ':')
- Port_ = value.substr(hostLen + 1);
- Host_ = value.substr(0, hostLen);
- }
- }
- void TBaseServerRequestData::SetPath(TString path) {
- Path_ = std::move(path);
- }
|