httpload.h 7.5 KB


  1. #pragma once
  2. #include "httpagent.h"
  3. #include "httpparser.h"
  4. #include "http_digest.h"
  5. #include <util/system/compat.h>
  6. #include <util/string/vector.h>
  7. #include <util/network/ip.h>
  8. #include <library/cpp/uri/http_url.h>
  9. #include <library/cpp/http/misc/httpcodes.h>
  10. /********************************************************/
  11. // Section 1: socket handlers
  12. /********************************************************/
  13. // The following classes allows to adopt template scheme
  14. // THttpAgent for work with socket by flexible
  15. // object-style scheme.
  16. /********************************************************/
  17. // This class is used as a base one for flexible
  18. // socket handling
  19. class socketAbstractHandler {
  20. public:
  21. virtual bool Good() = 0;
  22. virtual int Connect(const TAddrList& addrs, TDuration Timeout) = 0;
  23. virtual void Disconnect() = 0;
  24. virtual void shutdown() = 0;
  25. virtual bool send(const char* message, ssize_t messlen) = 0;
  26. virtual bool peek() = 0;
  27. virtual ssize_t read(void* buffer, ssize_t buflen) = 0;
  28. virtual ~socketAbstractHandler() {
  29. }
  30. protected:
  31. socketAbstractHandler() {
  32. }
  33. };
  34. /********************************************************/
  35. // This class is used as a proxy between THttpAgent and
  36. // socketAbstractHandler
  37. // (it is used by template scheme,
  38. // so it does not have virtual methods)
  39. class TSocketHandlerPtr {
  40. protected:
  41. socketAbstractHandler* Handler_;
  42. public:
  43. TSocketHandlerPtr()
  44. : Handler_(nullptr)
  45. {
  46. }
  47. virtual ~TSocketHandlerPtr() {
  48. delete Handler_;
  49. }
  50. int Good() {
  51. return (Handler_ && Handler_->Good());
  52. }
  53. int Connect(const TAddrList& addrs, TDuration Timeout) {
  54. return (Handler_) ? Handler_->Connect(addrs, Timeout) : 1;
  55. }
  56. void Disconnect() {
  57. if (Handler_)
  58. Handler_->Disconnect();
  59. }
  60. void shutdown() {
  61. if (Handler_)
  62. Handler_->shutdown();
  63. }
  64. bool send(const char* message, ssize_t messlen) {
  65. return (Handler_) ? Handler_->send(message, messlen) : false;
  66. }
  67. virtual bool peek() {
  68. return (Handler_) ? Handler_->peek() : false;
  69. }
  70. virtual ssize_t read(void* buffer, ssize_t buflen) {
  71. return (Handler_) ? Handler_->read(buffer, buflen) : 0;
  72. }
  73. void setHandler(socketAbstractHandler* handler) {
  74. if (Handler_)
  75. delete Handler_;
  76. Handler_ = handler;
  77. }
  78. };
  79. /********************************************************/
  80. // Here is httpAgent that uses socketAbstractHandler class
  81. // ant its derivatives
  82. using httpSpecialAgent = THttpAgent<TSocketHandlerPtr>;
  83. /********************************************************/
  84. // Regular handler is used as implementation of
  85. // socketAbstractHandler for work through HTTP protocol
  86. class socketRegularHandler: public socketAbstractHandler {
  87. protected:
  88. TSimpleSocketHandler Socket_;
  89. public:
  90. socketRegularHandler()
  91. : Socket_()
  92. {
  93. }
  94. bool Good() override {
  95. return Socket_.Good();
  96. }
  97. int Connect(const TAddrList& addrs, TDuration Timeout) override {
  98. return Socket_.Connect(addrs, Timeout);
  99. }
  100. void Disconnect() override {
  101. Socket_.Disconnect();
  102. }
  103. void shutdown() override {
  104. //Do not block writing to socket
  105. //There are servers that works in a bad way with this
  106. //mSocket.shutdown();
  107. }
  108. bool send(const char* message, ssize_t messlen) override {
  109. return Socket_.send(message, messlen);
  110. }
  111. bool peek() override {
  112. return Socket_.peek();
  113. }
  114. ssize_t read(void* buffer, ssize_t buflen) override {
  115. return Socket_.read(buffer, buflen);
  116. }
  117. };
  118. /********************************************************/
  119. // The base factory that allows to choose an appropriate
  120. // socketAbstractHandler implementation by url schema
  121. class socketHandlerFactory {
  122. public:
  123. virtual ~socketHandlerFactory() {
  124. }
  125. //returns mHandler_HTTP for correct HTTP-based url
  126. virtual socketAbstractHandler* chooseHandler(const THttpURL& url);
  127. static socketHandlerFactory sInstance;
  128. };
  129. /********************************************************/
  130. // Section 2: the configurates tool to parse an HTTP-response
  131. /********************************************************/
  132. class httpAgentReader: public THttpParserGeneric<1> {
  133. protected:
  134. THttpAuthHeader Header_;
  135. httpSpecialAgent& Agent_;
  136. char* Buffer_;
  137. void* BufPtr_;
  138. int BufSize_;
  139. long BufRest_;
  140. void readBuf();
  141. bool step() {
  142. if (BufRest_ == 0)
  143. readBuf();
  144. if (eof())
  145. return false;
  146. return true;
  147. }
  148. public:
  149. httpAgentReader(httpSpecialAgent& agent,
  150. const char* baseUrl,
  151. bool assumeConnectionClosed,
  152. bool use_auth = false,
  153. int bufSize = 0x1000);
  154. ~httpAgentReader();
  155. bool eof() {
  156. return BufRest_ < 0;
  157. }
  158. int error() {
  159. return Header_.error;
  160. }
  161. void setError(int errCode) {
  162. Header_.error = errCode;
  163. }
  164. const THttpAuthHeader* getAuthHeader() {
  165. return &Header_;
  166. }
  167. const THttpHeader* readHeader();
  168. long readPortion(void*& buf);
  169. bool skipTheRest();
  170. };
  171. /********************************************************/
  172. // Section 3: the main class
  173. /********************************************************/
  174. class httpLoadAgent: public httpSpecialAgent {
  175. protected:
  176. socketHandlerFactory& Factory_;
  177. bool HandleAuthorization_;
  178. THttpURL URL_;
  179. bool PersistentConn_;
  180. httpAgentReader* Reader_;
  181. TVector<TString> Headers_;
  182. int ErrCode_;
  183. char* RealHost_;
  184. httpDigestHandler Digest_;
  185. void clearReader();
  186. bool doSetHost(const TAddrList& addrs);
  187. bool doStartRequest();
  188. public:
  189. httpLoadAgent(bool handleAuthorization = false,
  190. socketHandlerFactory& factory = socketHandlerFactory::sInstance);
  191. ~httpLoadAgent();
  192. void setRealHost(const char* host);
  193. void setIMS(const char* ifModifiedSince);
  194. void addHeaderInstruction(const char* instr);
  195. void dropHeaderInstructions();
  196. bool startRequest(const char* url,
  197. const char* url_to_merge = nullptr,
  198. bool persistent = false,
  199. const TAddrList& addrs = TAddrList());
  200. // deprecated v4-only
  201. bool startRequest(const char* url,
  202. const char* url_to_merge,
  203. bool persistent,
  204. ui32 ip);
  205. bool startRequest(const THttpURL& url,
  206. bool persistent = false,
  207. const TAddrList& addrs = TAddrList());
  208. bool setHost(const char* host_url,
  209. const TAddrList& addrs = TAddrList());
  210. bool startOneRequest(const char* local_url);
  211. const THttpAuthHeader* getAuthHeader() {
  212. if (Reader_ && Reader_->getAuthHeader()->use_auth)
  213. return Reader_->getAuthHeader();
  214. return nullptr;
  215. }
  216. const THttpHeader* getHeader() {
  217. if (Reader_)
  218. return Reader_->getAuthHeader();
  219. return nullptr;
  220. }
  221. const THttpURL& getURL() {
  222. return URL_;
  223. }
  224. bool eof() {
  225. if (Reader_)
  226. return Reader_->eof();
  227. return true;
  228. }
  229. int error() {
  230. if (ErrCode_)
  231. return ErrCode_;
  232. if (Reader_)
  233. return Reader_->error();
  234. return HTTP_BAD_URL;
  235. }
  236. long readPortion(void*& buf) {
  237. if (Reader_)
  238. return Reader_->readPortion(buf);
  239. return -1;
  240. }
  241. };
  242. /********************************************************/