httpfetcher.h 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. #pragma once
  2. #ifdef _MSC_VER
  3. #include <io.h>
  4. #endif
  5. #include <library/cpp/http/misc/httpdate.h>
  6. #include "httpagent.h"
  7. #include "httpparser.h"
  8. struct TFakeBackup {
  9. int Write(void* /*buf*/, size_t /*size*/) {
  10. return 0;
  11. }
  12. };
  13. template <size_t bufsize = 5000>
  14. struct TFakeAlloc {
  15. void Shrink(void* /*buf*/, size_t /*size*/) {
  16. }
  17. void* Grab(size_t /*min*/, size_t* real) {
  18. *real = bufsize;
  19. return buf;
  20. }
  21. char buf[bufsize];
  22. };
  23. template <typename TAlloc = TFakeAlloc<>,
  24. typename TCheck = TFakeCheck<>,
  25. typename TWriter = TFakeBackup,
  26. typename TAgent = THttpAgent<>>
  27. class THttpFetcher: public THttpParser<TCheck>, public TAlloc, public TWriter, public TAgent {
  28. public:
  29. static const size_t TCP_MIN = 1500;
  30. static int TerminateNow;
  31. THttpFetcher()
  32. : THttpParser<TCheck>()
  33. , TAlloc()
  34. , TWriter()
  35. , TAgent()
  36. {
  37. }
  38. virtual ~THttpFetcher() {
  39. }
  40. int Fetch(THttpHeader* header, const char* path, const char* const* headers, int persistent, bool head_request = false) {
  41. int ret = 0;
  42. int fetcherr = 0;
  43. THttpParser<TCheck>::Init(header, head_request);
  44. const char* scheme = HttpUrlSchemeKindToString((THttpURL::TSchemeKind)TAgent::GetScheme());
  45. size_t schemelen = strlen(scheme);
  46. if (*path == '/') {
  47. header->base = TStringBuf(scheme, schemelen);
  48. header->base += TStringBuf("://", 3);
  49. header->base += TStringBuf(TAgent::pHostBeg, TAgent::pHostEnd - TAgent::pHostBeg);
  50. header->base += path;
  51. } else {
  52. if (strlen(path) >= FETCHER_URL_MAX) {
  53. header->error = HTTP_URL_TOO_LARGE;
  54. return 0;
  55. }
  56. header->base = path;
  57. }
  58. if ((ret = TAgent::RequestGet(path, headers, persistent, head_request))) {
  59. header->error = (i16)ret;
  60. return 0;
  61. }
  62. bool inheader = 1;
  63. void *bufptr = nullptr, *buf = nullptr, *parsebuf = nullptr;
  64. ssize_t got;
  65. size_t buffree = 0, bufsize = 0, buflen = 0;
  66. size_t maxsize = TCheck::GetMaxHeaderSize();
  67. do {
  68. if (buffree < TCP_MIN) {
  69. if (buf) {
  70. TAlloc::Shrink(buf, buflen - buffree);
  71. if (TWriter::Write(buf, buflen - buffree) < 0) {
  72. buf = nullptr;
  73. ret = EIO;
  74. break;
  75. }
  76. }
  77. if (!(buf = TAlloc::Grab(TCP_MIN, &buflen))) {
  78. ret = ENOMEM;
  79. break;
  80. }
  81. bufptr = buf;
  82. buffree = buflen;
  83. }
  84. if ((got = TAgent::read(bufptr, buffree)) < 0) {
  85. fetcherr = errno;
  86. if (errno == EINTR)
  87. header->error = HTTP_INTERRUPTED;
  88. else if (errno == ETIMEDOUT)
  89. header->error = HTTP_TIMEDOUT_WHILE_BYTES_RECEIVING;
  90. else
  91. header->error = HTTP_CONNECTION_LOST;
  92. break;
  93. }
  94. parsebuf = bufptr;
  95. bufptr = (char*)bufptr + got;
  96. bufsize += got;
  97. buffree -= got;
  98. THttpParser<TCheck>::Parse(parsebuf, got);
  99. if (header->error)
  100. break; //if ANY error ocurred we will stop download that file or will have unprognosed stream position until MAX size reached
  101. if (inheader && THttpParser<TCheck>::GetState() != THttpParser<TCheck>::hp_in_header) {
  102. inheader = 0;
  103. if (TCheck::Check(header))
  104. break;
  105. if (header->header_size > (long)maxsize) {
  106. header->error = HTTP_HEADER_TOO_LARGE;
  107. break;
  108. }
  109. }
  110. if (!inheader) {
  111. maxsize = TCheck::GetMaxBodySize(header);
  112. }
  113. if (header->http_status >= HTTP_EXTENDED)
  114. break;
  115. if (bufsize > maxsize) {
  116. header->error = inheader ? HTTP_HEADER_TOO_LARGE : HTTP_BODY_TOO_LARGE;
  117. break;
  118. }
  119. if (TerminateNow) {
  120. header->error = HTTP_INTERRUPTED;
  121. break;
  122. }
  123. } while (THttpParser<TCheck>::GetState() > THttpParser<TCheck>::hp_eof);
  124. i64 Adjustment = 0;
  125. if (!header->error) {
  126. if (header->transfer_chunked) {
  127. Adjustment = header->header_size + header->entity_size - bufsize - 1;
  128. } else if (header->content_length >= 0) {
  129. Adjustment = header->header_size + header->content_length - bufsize;
  130. }
  131. if (Adjustment > 0)
  132. Adjustment = 0;
  133. }
  134. if (buf) {
  135. TAlloc::Shrink(buf, buflen - buffree + Adjustment);
  136. if (TWriter::Write(buf, buflen - buffree) < 0)
  137. ret = EIO;
  138. }
  139. TCheck::CheckEndDoc(header);
  140. if (ret || header->error || header->http_status >= HTTP_EXTENDED || header->connection_closed) {
  141. TAgent::Disconnect();
  142. if (!fetcherr)
  143. fetcherr = errno;
  144. }
  145. errno = fetcherr;
  146. return ret;
  147. }
  148. };
  149. template <typename TAlloc, typename TCheck, typename TWriter, typename TAgent>
  150. int THttpFetcher<TAlloc, TCheck, TWriter, TAgent>::TerminateNow = 0;