exthttpcodes.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. #include "exthttpcodes.h"
  2. #include <cstring>
  3. const ui16 CrazyServer = ShouldDelete | MarkSuspect;
  4. struct http_flag {
  5. ui16 http;
  6. ui16 flag;
  7. };
  8. static http_flag HTTP_FLAG[] = {
  9. {HTTP_CONTINUE, MarkSuspect}, // 100
  10. {HTTP_SWITCHING_PROTOCOLS, CrazyServer}, // 101
  11. {HTTP_PROCESSING, CrazyServer}, // 102
  12. {HTTP_OK, ShouldReindex}, // 200
  13. {HTTP_CREATED, CrazyServer}, // 201
  14. {HTTP_ACCEPTED, ShouldDelete}, // 202
  15. {HTTP_NON_AUTHORITATIVE_INFORMATION, ShouldReindex}, // 203
  16. {HTTP_NO_CONTENT, ShouldDelete}, // 204
  17. {HTTP_RESET_CONTENT, ShouldDelete}, // 205
  18. {HTTP_PARTIAL_CONTENT, ShouldReindex}, // 206
  19. {HTTP_MULTI_STATUS, CrazyServer}, // 207
  20. {HTTP_ALREADY_REPORTED, CrazyServer}, // 208
  21. {HTTP_IM_USED, CrazyServer}, // 226
  22. {HTTP_MULTIPLE_CHOICES, CheckLinks | ShouldDelete}, // 300
  23. {HTTP_MOVED_PERMANENTLY, CheckLocation | ShouldDelete | MoveRedir}, // 301
  24. {HTTP_FOUND, CheckLocation | ShouldDelete | MoveRedir}, // 302
  25. {HTTP_SEE_OTHER, CheckLocation | ShouldDelete | MoveRedir}, // 303
  26. {HTTP_NOT_MODIFIED, 0}, // 304
  27. {HTTP_USE_PROXY, ShouldDelete}, // 305
  28. {HTTP_TEMPORARY_REDIRECT, CheckLocation | ShouldDelete | MoveRedir}, // 307
  29. {HTTP_PERMANENT_REDIRECT, CheckLocation | ShouldDelete | MoveRedir}, // 308
  30. {HTTP_BAD_REQUEST, CrazyServer}, // 400
  31. {HTTP_UNAUTHORIZED, ShouldDelete}, // 401
  32. {HTTP_PAYMENT_REQUIRED, ShouldDelete}, // 402
  33. {HTTP_FORBIDDEN, ShouldDelete}, // 403
  34. {HTTP_NOT_FOUND, ShouldDelete}, // 404
  35. {HTTP_METHOD_NOT_ALLOWED, ShouldDelete}, // 405
  36. {HTTP_NOT_ACCEPTABLE, ShouldDelete}, // 406
  37. {HTTP_PROXY_AUTHENTICATION_REQUIRED, CrazyServer}, // 407
  38. {HTTP_REQUEST_TIME_OUT, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 408
  39. {HTTP_CONFLICT, MarkSuspect}, // 409
  40. {HTTP_GONE, ShouldDelete}, // 410
  41. {HTTP_LENGTH_REQUIRED, CrazyServer}, // 411
  42. {HTTP_PRECONDITION_FAILED, CrazyServer}, // 412
  43. {HTTP_REQUEST_ENTITY_TOO_LARGE, CrazyServer}, // 413
  44. {HTTP_REQUEST_URI_TOO_LARGE, ShouldDelete}, // 414
  45. {HTTP_UNSUPPORTED_MEDIA_TYPE, CrazyServer}, // 415
  46. {HTTP_REQUESTED_RANGE_NOT_SATISFIABLE, CrazyServer}, // 416
  47. {HTTP_EXPECTATION_FAILED, ShouldDelete}, // 417
  48. {HTTP_I_AM_A_TEAPOT, CrazyServer}, // 418
  49. {HTTP_AUTHENTICATION_TIMEOUT, ShouldDelete}, // 419
  50. {HTTP_MISDIRECTED_REQUEST, CrazyServer}, // 421
  51. {HTTP_UNPROCESSABLE_ENTITY, CrazyServer}, // 422
  52. {HTTP_LOCKED, ShouldDelete}, // 423
  53. {HTTP_FAILED_DEPENDENCY, CrazyServer}, // 424
  54. {HTTP_UPGRADE_REQUIRED, ShouldDelete}, // 426
  55. {HTTP_PRECONDITION_REQUIRED, ShouldDelete}, // 428
  56. {HTTP_TOO_MANY_REQUESTS, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 429
  57. {HTTP_UNAVAILABLE_FOR_LEGAL_REASONS, ShouldDelete}, // 451
  58. {HTTP_INTERNAL_SERVER_ERROR, MarkSuspect}, // 500
  59. {HTTP_NOT_IMPLEMENTED, ShouldDelete | ShouldDisconnect}, // 501
  60. {HTTP_BAD_GATEWAY, MarkSuspect}, // 502
  61. {HTTP_SERVICE_UNAVAILABLE, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 503
  62. {HTTP_GATEWAY_TIME_OUT, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 504
  63. {HTTP_HTTP_VERSION_NOT_SUPPORTED, CrazyServer | ShouldDisconnect}, // 505
  64. {HTTP_VARIANT_ALSO_NEGOTIATES, CrazyServer | ShouldDisconnect}, // 506
  65. {HTTP_INSUFFICIENT_STORAGE, CrazyServer | ShouldDisconnect}, // 507
  66. {HTTP_LOOP_DETECTED, CrazyServer | ShouldDisconnect}, // 508
  67. {HTTP_BANDWIDTH_LIMIT_EXCEEDED, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 509
  68. {HTTP_NOT_EXTENDED, ShouldDelete}, // 510
  69. {HTTP_NETWORK_AUTHENTICATION_REQUIRED, ShouldDelete}, // 511
  70. // custom
  71. {HTTP_BAD_RESPONSE_HEADER, CrazyServer}, // 1000
  72. {HTTP_CONNECTION_LOST, ShouldRetry}, // 1001
  73. {HTTP_BODY_TOO_LARGE, ShouldDelete | CanBeFake}, // 1002
  74. {HTTP_ROBOTS_TXT_DISALLOW, ShouldDelete}, // 1003
  75. {HTTP_BAD_URL, ShouldDelete}, // 1004
  76. {HTTP_BAD_MIME, ShouldDelete}, // 1005
  77. {HTTP_DNS_FAILURE, ShouldDisconnect | MarkSuspect}, // 1006
  78. {HTTP_BAD_STATUS_CODE, CrazyServer}, // 1007
  79. {HTTP_BAD_HEADER_STRING, CrazyServer}, // 1008
  80. {HTTP_BAD_CHUNK, CrazyServer}, // 1009
  81. {HTTP_CONNECT_FAILED, ShouldDisconnect | ShouldRetry | MarkSuspect}, // 1010
  82. {HTTP_FILTER_DISALLOW, ShouldDelete}, // 1011
  83. {HTTP_LOCAL_EIO, ShouldRetry}, // 1012
  84. {HTTP_BAD_CONTENT_LENGTH, ShouldDelete}, // 1013
  85. {HTTP_BAD_ENCODING, ShouldDelete}, // 1014
  86. {HTTP_LENGTH_UNKNOWN, ShouldDelete}, // 1015
  87. {HTTP_HEADER_EOF, ShouldRetry | CanBeFake}, // 1016
  88. {HTTP_MESSAGE_EOF, ShouldRetry | CanBeFake}, // 1017
  89. {HTTP_CHUNK_EOF, ShouldRetry | CanBeFake}, // 1018
  90. {HTTP_PAST_EOF, ShouldRetry | ShouldDelete | CanBeFake}, // 1019
  91. {HTTP_HEADER_TOO_LARGE, ShouldDelete}, // 1020
  92. {HTTP_URL_TOO_LARGE, ShouldDelete}, // 1021
  93. {HTTP_INTERRUPTED, 0}, // 1022
  94. {HTTP_CUSTOM_NOT_MODIFIED, 0}, // 1023
  95. {HTTP_BAD_CONTENT_ENCODING, ShouldDelete}, // 1024
  96. {HTTP_PROXY_UNKNOWN, 0}, // 1030
  97. {HTTP_PROXY_REQUEST_TIME_OUT, 0}, // 1031
  98. {HTTP_PROXY_INTERNAL_ERROR, 0}, // 1032
  99. {HTTP_PROXY_CONNECT_FAILED, 0}, // 1033
  100. {HTTP_PROXY_CONNECTION_LOST, 0}, // 1034
  101. {HTTP_PROXY_NO_PROXY, 0}, // 1035
  102. {HTTP_PROXY_ERROR, 0}, // 1036
  103. {HTTP_SSL_ERROR, 0}, // 1037
  104. {HTTP_CACHED_COPY_NOT_FOUND, 0}, // 1038
  105. {HTTP_TIMEDOUT_WHILE_BYTES_RECEIVING, ShouldRetry}, // 1039
  106. {HTTP_FETCHER_BAD_RESPONSE, 0}, // 1040
  107. {HTTP_FETCHER_MB_ERROR, 0}, // 1041
  108. {HTTP_SSL_CERT_ERROR, 0}, // 1042
  109. // Custom (replace HTTP 200/304)
  110. {EXT_HTTP_MIRRMOVE, 0}, // 2000
  111. {EXT_HTTP_MANUAL_DELETE, ShouldDelete}, // 2001
  112. {EXT_HTTP_NOTUSED2, ShouldDelete}, // 2002
  113. {EXT_HTTP_NOTUSED3, ShouldDelete}, // 2003
  114. {EXT_HTTP_REFRESH, ShouldDelete | CheckLinks | MoveRedir}, // 2004
  115. {EXT_HTTP_NOINDEX, ShouldDelete | CheckLinks}, // 2005
  116. {EXT_HTTP_BADCODES, ShouldDelete}, // 2006
  117. {EXT_HTTP_SITESTAT, ShouldDelete}, // 2007
  118. {EXT_HTTP_IOERROR, ShouldDelete}, // 2008
  119. {EXT_HTTP_BASEERROR, ShouldDelete}, // 2009
  120. {EXT_HTTP_PARSERROR, ShouldDelete | CanBeFake}, // 2010
  121. {EXT_HTTP_BAD_CHARSET, ShouldDelete | CheckLinks}, // 2011
  122. {EXT_HTTP_BAD_LANGUAGE, ShouldDelete | CheckLinks}, // 2012
  123. {EXT_HTTP_NUMERERROR, ShouldDelete}, // 2013
  124. {EXT_HTTP_EMPTYDOC, ShouldDelete | CheckLinks}, // 2014
  125. {EXT_HTTP_HUGEDOC, ShouldDelete}, // 2015
  126. {EXT_HTTP_LINKGARBAGE, ShouldDelete}, // 2016
  127. {EXT_HTTP_PARSERFAIL, ShouldDelete}, // 2019
  128. {EXT_HTTP_GZIPERROR, ShouldDelete}, // 2020
  129. {EXT_HTTP_MANUAL_DELETE_URL, ShouldDelete}, // 2022
  130. {EXT_HTTP_CUSTOM_PARTIAL_CONTENT, ShouldReindex}, // 2023
  131. {EXT_HTTP_EMPTY_RESPONSE, ShouldDelete}, // 2024
  132. {EXT_HTTP_REL_CANONICAL, ShouldDelete | CheckLinks | MoveRedir}, // 2025
  133. {0, 0}};
  134. static ui16* prepare_flags(http_flag* arg) {
  135. static ui16 flags[EXT_HTTP_CODE_MAX];
  136. http_flag* ptr;
  137. size_t i;
  138. // устанавливаем значение по умолчанию для кодов не перечисленных в таблице выше
  139. for (i = 0; i < EXT_HTTP_CODE_MAX; ++i)
  140. flags[i] = CrazyServer;
  141. // устанавливаем флаги для перечисленных кодов
  142. for (ptr = arg; ptr->http; ++ptr)
  143. flags[ptr->http & (EXT_HTTP_CODE_MAX - 1)] = ptr->flag;
  144. // для стандартных кодов ошибок берем флаги из первого кода каждой группы и проставляем их
  145. // всем кодам не перечисленным в таблице выше
  146. for (size_t group = 0; group < 1000; group += 100)
  147. for (size_t j = group + 1; j < group + 100; ++j)
  148. flags[j] = flags[group];
  149. // предыдущий цикл затер некоторые флаги перечисленные в таблице выше
  150. // восстанавливаем их
  151. for (ptr = arg; ptr->http; ++ptr)
  152. flags[ptr->http & (EXT_HTTP_CODE_MAX - 1)] = ptr->flag;
  153. return flags;
  154. }
  155. ui16* http2status = prepare_flags(HTTP_FLAG);
  156. TStringBuf ExtHttpCodeStr(int code) noexcept {
  157. if (code < HTTP_CODE_MAX) {
  158. return HttpCodeStr(code);
  159. }
  160. switch (code) {
  161. case HTTP_BAD_RESPONSE_HEADER:
  162. return TStringBuf("Bad response header");
  163. case HTTP_CONNECTION_LOST:
  164. return TStringBuf("Connection lost");
  165. case HTTP_BODY_TOO_LARGE:
  166. return TStringBuf("Body too large");
  167. case HTTP_ROBOTS_TXT_DISALLOW:
  168. return TStringBuf("robots.txt disallow");
  169. case HTTP_BAD_URL:
  170. return TStringBuf("Bad url");
  171. case HTTP_BAD_MIME:
  172. return TStringBuf("Bad mime type");
  173. case HTTP_DNS_FAILURE:
  174. return TStringBuf("Dns failure");
  175. case HTTP_BAD_STATUS_CODE:
  176. return TStringBuf("Bad status code");
  177. case HTTP_BAD_HEADER_STRING:
  178. return TStringBuf("Bad header string");
  179. case HTTP_BAD_CHUNK:
  180. return TStringBuf("Bad chunk");
  181. case HTTP_CONNECT_FAILED:
  182. return TStringBuf("Connect failed");
  183. case HTTP_FILTER_DISALLOW:
  184. return TStringBuf("Filter disallow");
  185. case HTTP_LOCAL_EIO:
  186. return TStringBuf("Local eio");
  187. case HTTP_BAD_CONTENT_LENGTH:
  188. return TStringBuf("Bad content length");
  189. case HTTP_BAD_ENCODING:
  190. return TStringBuf("Bad encoding");
  191. case HTTP_LENGTH_UNKNOWN:
  192. return TStringBuf("Length unknown");
  193. case HTTP_HEADER_EOF:
  194. return TStringBuf("Header EOF");
  195. case HTTP_MESSAGE_EOF:
  196. return TStringBuf("Message EOF");
  197. case HTTP_CHUNK_EOF:
  198. return TStringBuf("Chunk EOF");
  199. case HTTP_PAST_EOF:
  200. return TStringBuf("Past EOF");
  201. case HTTP_HEADER_TOO_LARGE:
  202. return TStringBuf("Header is too large");
  203. case HTTP_URL_TOO_LARGE:
  204. return TStringBuf("Url is too large");
  205. case HTTP_INTERRUPTED:
  206. return TStringBuf("Interrupted");
  207. case HTTP_CUSTOM_NOT_MODIFIED:
  208. return TStringBuf("Signature detector thinks that doc is not modified");
  209. case HTTP_BAD_CONTENT_ENCODING:
  210. return TStringBuf("Bad content encoding");
  211. case HTTP_NO_RESOURCES:
  212. return TStringBuf("No resources");
  213. case HTTP_FETCHER_SHUTDOWN:
  214. return TStringBuf("Fetcher shutdown");
  215. case HTTP_CHUNK_TOO_LARGE:
  216. return TStringBuf("Chunk size is too big");
  217. case HTTP_SERVER_BUSY:
  218. return TStringBuf("Server is busy");
  219. case HTTP_SERVICE_UNKNOWN:
  220. return TStringBuf("Service is unknown");
  221. case HTTP_PROXY_UNKNOWN:
  222. return TStringBuf("Zora: unknown error");
  223. case HTTP_PROXY_REQUEST_TIME_OUT:
  224. return TStringBuf("Zora: request time out");
  225. case HTTP_PROXY_INTERNAL_ERROR:
  226. return TStringBuf("Zora: internal server error");
  227. case HTTP_PROXY_CONNECT_FAILED:
  228. return TStringBuf("Spider proxy connect failed");
  229. case HTTP_PROXY_CONNECTION_LOST:
  230. return TStringBuf("Spider proxy connection lost");
  231. case HTTP_PROXY_NO_PROXY:
  232. return TStringBuf("Spider proxy no proxy alive in region");
  233. case HTTP_PROXY_ERROR:
  234. return TStringBuf("Spider proxy returned custom error");
  235. case HTTP_SSL_ERROR:
  236. return TStringBuf("Ssl library returned error");
  237. case HTTP_CACHED_COPY_NOT_FOUND:
  238. return TStringBuf("Cached copy for the url is not available");
  239. case HTTP_TIMEDOUT_WHILE_BYTES_RECEIVING:
  240. return TStringBuf("Timed out while bytes receiving");
  241. // TODO: messages for >2000 codes
  242. default:
  243. return TStringBuf("Unknown HTTP code");
  244. }
  245. }