123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266 |
- #include "exthttpcodes.h"
- #include <cstring>
- const ui16 CrazyServer = ShouldDelete | MarkSuspect;
- struct http_flag {
- ui16 http;
- ui16 flag;
- };
- static http_flag HTTP_FLAG[] = {
- {HTTP_CONTINUE, MarkSuspect},
- {HTTP_SWITCHING_PROTOCOLS, CrazyServer},
- {HTTP_PROCESSING, CrazyServer},
- {HTTP_OK, ShouldReindex},
- {HTTP_CREATED, CrazyServer},
- {HTTP_ACCEPTED, ShouldDelete},
- {HTTP_NON_AUTHORITATIVE_INFORMATION, ShouldReindex},
- {HTTP_NO_CONTENT, ShouldDelete},
- {HTTP_RESET_CONTENT, ShouldDelete},
- {HTTP_PARTIAL_CONTENT, ShouldReindex},
- {HTTP_MULTI_STATUS, CrazyServer},
- {HTTP_ALREADY_REPORTED, CrazyServer},
- {HTTP_IM_USED, CrazyServer},
- {HTTP_MULTIPLE_CHOICES, CheckLinks | ShouldDelete},
- {HTTP_MOVED_PERMANENTLY, CheckLocation | ShouldDelete | MoveRedir},
- {HTTP_FOUND, CheckLocation | ShouldDelete | MoveRedir},
- {HTTP_SEE_OTHER, CheckLocation | ShouldDelete | MoveRedir},
- {HTTP_NOT_MODIFIED, 0},
- {HTTP_USE_PROXY, ShouldDelete},
- {HTTP_TEMPORARY_REDIRECT, CheckLocation | ShouldDelete | MoveRedir},
- {HTTP_PERMANENT_REDIRECT, CheckLocation | ShouldDelete | MoveRedir},
- {HTTP_BAD_REQUEST, CrazyServer},
- {HTTP_UNAUTHORIZED, ShouldDelete},
- {HTTP_PAYMENT_REQUIRED, ShouldDelete},
- {HTTP_FORBIDDEN, ShouldDelete},
- {HTTP_NOT_FOUND, ShouldDelete},
- {HTTP_METHOD_NOT_ALLOWED, ShouldDelete},
- {HTTP_NOT_ACCEPTABLE, ShouldDelete},
- {HTTP_PROXY_AUTHENTICATION_REQUIRED, CrazyServer},
- {HTTP_REQUEST_TIME_OUT, ShouldDisconnect | ShouldRetry | MarkSuspect},
- {HTTP_CONFLICT, MarkSuspect},
- {HTTP_GONE, ShouldDelete},
- {HTTP_LENGTH_REQUIRED, CrazyServer},
- {HTTP_PRECONDITION_FAILED, CrazyServer},
- {HTTP_REQUEST_ENTITY_TOO_LARGE, CrazyServer},
- {HTTP_REQUEST_URI_TOO_LARGE, ShouldDelete},
- {HTTP_UNSUPPORTED_MEDIA_TYPE, CrazyServer},
- {HTTP_REQUESTED_RANGE_NOT_SATISFIABLE, CrazyServer},
- {HTTP_EXPECTATION_FAILED, ShouldDelete},
- {HTTP_I_AM_A_TEAPOT, CrazyServer},
- {HTTP_AUTHENTICATION_TIMEOUT, ShouldDelete},
- {HTTP_MISDIRECTED_REQUEST, CrazyServer},
- {HTTP_UNPROCESSABLE_ENTITY, CrazyServer},
- {HTTP_LOCKED, ShouldDelete},
- {HTTP_FAILED_DEPENDENCY, CrazyServer},
- {HTTP_UPGRADE_REQUIRED, ShouldDelete},
- {HTTP_PRECONDITION_REQUIRED, ShouldDelete},
- {HTTP_TOO_MANY_REQUESTS, ShouldDisconnect | ShouldRetry | MarkSuspect},
- {HTTP_UNAVAILABLE_FOR_LEGAL_REASONS, ShouldDelete},
- {HTTP_INTERNAL_SERVER_ERROR, MarkSuspect},
- {HTTP_NOT_IMPLEMENTED, ShouldDelete | ShouldDisconnect},
- {HTTP_BAD_GATEWAY, MarkSuspect},
- {HTTP_SERVICE_UNAVAILABLE, ShouldDisconnect | ShouldRetry | MarkSuspect},
- {HTTP_GATEWAY_TIME_OUT, ShouldDisconnect | ShouldRetry | MarkSuspect},
- {HTTP_HTTP_VERSION_NOT_SUPPORTED, CrazyServer | ShouldDisconnect},
- {HTTP_VARIANT_ALSO_NEGOTIATES, CrazyServer | ShouldDisconnect},
- {HTTP_INSUFFICIENT_STORAGE, CrazyServer | ShouldDisconnect},
- {HTTP_LOOP_DETECTED, CrazyServer | ShouldDisconnect},
- {HTTP_BANDWIDTH_LIMIT_EXCEEDED, ShouldDisconnect | ShouldRetry | MarkSuspect},
- {HTTP_NOT_EXTENDED, ShouldDelete},
- {HTTP_NETWORK_AUTHENTICATION_REQUIRED, ShouldDelete},
-
- {HTTP_BAD_RESPONSE_HEADER, CrazyServer},
- {HTTP_CONNECTION_LOST, ShouldRetry},
- {HTTP_BODY_TOO_LARGE, ShouldDelete | CanBeFake},
- {HTTP_ROBOTS_TXT_DISALLOW, ShouldDelete},
- {HTTP_BAD_URL, ShouldDelete},
- {HTTP_BAD_MIME, ShouldDelete},
- {HTTP_DNS_FAILURE, ShouldDisconnect | MarkSuspect},
- {HTTP_BAD_STATUS_CODE, CrazyServer},
- {HTTP_BAD_HEADER_STRING, CrazyServer},
- {HTTP_BAD_CHUNK, CrazyServer},
- {HTTP_CONNECT_FAILED, ShouldDisconnect | ShouldRetry | MarkSuspect},
- {HTTP_FILTER_DISALLOW, ShouldDelete},
- {HTTP_LOCAL_EIO, ShouldRetry},
- {HTTP_BAD_CONTENT_LENGTH, ShouldDelete},
- {HTTP_BAD_ENCODING, ShouldDelete},
- {HTTP_LENGTH_UNKNOWN, ShouldDelete},
- {HTTP_HEADER_EOF, ShouldRetry | CanBeFake},
- {HTTP_MESSAGE_EOF, ShouldRetry | CanBeFake},
- {HTTP_CHUNK_EOF, ShouldRetry | CanBeFake},
- {HTTP_PAST_EOF, ShouldRetry | ShouldDelete | CanBeFake},
- {HTTP_HEADER_TOO_LARGE, ShouldDelete},
- {HTTP_URL_TOO_LARGE, ShouldDelete},
- {HTTP_INTERRUPTED, 0},
- {HTTP_CUSTOM_NOT_MODIFIED, 0},
- {HTTP_BAD_CONTENT_ENCODING, ShouldDelete},
- {HTTP_PROXY_UNKNOWN, 0},
- {HTTP_PROXY_REQUEST_TIME_OUT, 0},
- {HTTP_PROXY_INTERNAL_ERROR, 0},
- {HTTP_PROXY_CONNECT_FAILED, 0},
- {HTTP_PROXY_CONNECTION_LOST, 0},
- {HTTP_PROXY_NO_PROXY, 0},
- {HTTP_PROXY_ERROR, 0},
- {HTTP_SSL_ERROR, 0},
- {HTTP_CACHED_COPY_NOT_FOUND, 0},
- {HTTP_TIMEDOUT_WHILE_BYTES_RECEIVING, ShouldRetry},
- {HTTP_FETCHER_BAD_RESPONSE, 0},
- {HTTP_FETCHER_MB_ERROR, 0},
- {HTTP_SSL_CERT_ERROR, 0},
-
- {EXT_HTTP_MIRRMOVE, 0},
- {EXT_HTTP_MANUAL_DELETE, ShouldDelete},
- {EXT_HTTP_NOTUSED2, ShouldDelete},
- {EXT_HTTP_NOTUSED3, ShouldDelete},
- {EXT_HTTP_REFRESH, ShouldDelete | CheckLinks | MoveRedir},
- {EXT_HTTP_NOINDEX, ShouldDelete | CheckLinks},
- {EXT_HTTP_BADCODES, ShouldDelete},
- {EXT_HTTP_SITESTAT, ShouldDelete},
- {EXT_HTTP_IOERROR, ShouldDelete},
- {EXT_HTTP_BASEERROR, ShouldDelete},
- {EXT_HTTP_PARSERROR, ShouldDelete | CanBeFake},
- {EXT_HTTP_BAD_CHARSET, ShouldDelete | CheckLinks},
- {EXT_HTTP_BAD_LANGUAGE, ShouldDelete | CheckLinks},
- {EXT_HTTP_NUMERERROR, ShouldDelete},
- {EXT_HTTP_EMPTYDOC, ShouldDelete | CheckLinks},
- {EXT_HTTP_HUGEDOC, ShouldDelete},
- {EXT_HTTP_LINKGARBAGE, ShouldDelete},
- {EXT_HTTP_PARSERFAIL, ShouldDelete},
- {EXT_HTTP_GZIPERROR, ShouldDelete},
- {EXT_HTTP_MANUAL_DELETE_URL, ShouldDelete},
- {EXT_HTTP_CUSTOM_PARTIAL_CONTENT, ShouldReindex},
- {EXT_HTTP_EMPTY_RESPONSE, ShouldDelete},
- {EXT_HTTP_REL_CANONICAL, ShouldDelete | CheckLinks | MoveRedir},
- {0, 0}};
- static ui16* prepare_flags(http_flag* arg) {
- static ui16 flags[EXT_HTTP_CODE_MAX];
- http_flag* ptr;
- size_t i;
-
- for (i = 0; i < EXT_HTTP_CODE_MAX; ++i)
- flags[i] = CrazyServer;
-
- for (ptr = arg; ptr->http; ++ptr)
- flags[ptr->http & (EXT_HTTP_CODE_MAX - 1)] = ptr->flag;
-
-
- for (size_t group = 0; group < 1000; group += 100)
- for (size_t j = group + 1; j < group + 100; ++j)
- flags[j] = flags[group];
-
-
- for (ptr = arg; ptr->http; ++ptr)
- flags[ptr->http & (EXT_HTTP_CODE_MAX - 1)] = ptr->flag;
- return flags;
- }
- ui16* http2status = prepare_flags(HTTP_FLAG);
- TStringBuf ExtHttpCodeStr(int code) noexcept {
- if (code < HTTP_CODE_MAX) {
- return HttpCodeStr(code);
- }
- switch (code) {
- case HTTP_BAD_RESPONSE_HEADER:
- return TStringBuf("Bad response header");
- case HTTP_CONNECTION_LOST:
- return TStringBuf("Connection lost");
- case HTTP_BODY_TOO_LARGE:
- return TStringBuf("Body too large");
- case HTTP_ROBOTS_TXT_DISALLOW:
- return TStringBuf("robots.txt disallow");
- case HTTP_BAD_URL:
- return TStringBuf("Bad url");
- case HTTP_BAD_MIME:
- return TStringBuf("Bad mime type");
- case HTTP_DNS_FAILURE:
- return TStringBuf("Dns failure");
- case HTTP_BAD_STATUS_CODE:
- return TStringBuf("Bad status code");
- case HTTP_BAD_HEADER_STRING:
- return TStringBuf("Bad header string");
- case HTTP_BAD_CHUNK:
- return TStringBuf("Bad chunk");
- case HTTP_CONNECT_FAILED:
- return TStringBuf("Connect failed");
- case HTTP_FILTER_DISALLOW:
- return TStringBuf("Filter disallow");
- case HTTP_LOCAL_EIO:
- return TStringBuf("Local eio");
- case HTTP_BAD_CONTENT_LENGTH:
- return TStringBuf("Bad content length");
- case HTTP_BAD_ENCODING:
- return TStringBuf("Bad encoding");
- case HTTP_LENGTH_UNKNOWN:
- return TStringBuf("Length unknown");
- case HTTP_HEADER_EOF:
- return TStringBuf("Header EOF");
- case HTTP_MESSAGE_EOF:
- return TStringBuf("Message EOF");
- case HTTP_CHUNK_EOF:
- return TStringBuf("Chunk EOF");
- case HTTP_PAST_EOF:
- return TStringBuf("Past EOF");
- case HTTP_HEADER_TOO_LARGE:
- return TStringBuf("Header is too large");
- case HTTP_URL_TOO_LARGE:
- return TStringBuf("Url is too large");
- case HTTP_INTERRUPTED:
- return TStringBuf("Interrupted");
- case HTTP_CUSTOM_NOT_MODIFIED:
- return TStringBuf("Signature detector thinks that doc is not modified");
- case HTTP_BAD_CONTENT_ENCODING:
- return TStringBuf("Bad content encoding");
- case HTTP_NO_RESOURCES:
- return TStringBuf("No resources");
- case HTTP_FETCHER_SHUTDOWN:
- return TStringBuf("Fetcher shutdown");
- case HTTP_CHUNK_TOO_LARGE:
- return TStringBuf("Chunk size is too big");
- case HTTP_SERVER_BUSY:
- return TStringBuf("Server is busy");
- case HTTP_SERVICE_UNKNOWN:
- return TStringBuf("Service is unknown");
- case HTTP_PROXY_UNKNOWN:
- return TStringBuf("Zora: unknown error");
- case HTTP_PROXY_REQUEST_TIME_OUT:
- return TStringBuf("Zora: request time out");
- case HTTP_PROXY_INTERNAL_ERROR:
- return TStringBuf("Zora: internal server error");
- case HTTP_PROXY_CONNECT_FAILED:
- return TStringBuf("Spider proxy connect failed");
- case HTTP_PROXY_CONNECTION_LOST:
- return TStringBuf("Spider proxy connection lost");
- case HTTP_PROXY_NO_PROXY:
- return TStringBuf("Spider proxy no proxy alive in region");
- case HTTP_PROXY_ERROR:
- return TStringBuf("Spider proxy returned custom error");
- case HTTP_SSL_ERROR:
- return TStringBuf("Ssl library returned error");
- case HTTP_CACHED_COPY_NOT_FOUND:
- return TStringBuf("Cached copy for the url is not available");
- case HTTP_TIMEDOUT_WHILE_BYTES_RECEIVING:
- return TStringBuf("Timed out while bytes receiving");
-
- default:
- return TStringBuf("Unknown HTTP code");
- }
- }
|