socket.cpp 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297
  1. #include "ip.h"
  2. #include "socket.h"
  3. #include "address.h"
  4. #include "pollerimpl.h"
  5. #include "iovec.h"
  6. #include <util/system/defaults.h>
  7. #include <util/system/byteorder.h>
  8. #if defined(_unix_)
  9. #include <netdb.h>
  10. #include <sys/types.h>
  11. #include <sys/socket.h>
  12. #include <sys/un.h>
  13. #include <sys/ioctl.h>
  14. #include <netinet/in.h>
  15. #include <netinet/tcp.h>
  16. #include <arpa/inet.h>
  17. #endif
  18. #if defined(_freebsd_)
  19. #include <sys/module.h>
  20. #define ACCEPT_FILTER_MOD
  21. #include <sys/socketvar.h>
  22. #endif
  23. #if defined(_win_)
  24. #include <cerrno>
  25. #include <winsock2.h>
  26. #include <ws2tcpip.h>
  27. #include <wspiapi.h>
  28. #include <util/system/compat.h>
  29. #endif
  30. #include <util/generic/ylimits.h>
  31. #include <util/string/cast.h>
  32. #include <util/stream/mem.h>
  33. #include <util/system/datetime.h>
  34. #include <util/system/error.h>
  35. #include <util/memory/tempbuf.h>
  36. #include <util/generic/singleton.h>
  37. #include <util/generic/hash_set.h>
  38. #include <stddef.h>
  39. #include <sys/uio.h>
  40. using namespace NAddr;
  41. #if defined(_win_)
  42. int inet_aton(const char* cp, struct in_addr* inp) {
  43. sockaddr_in addr;
  44. addr.sin_family = AF_INET;
  45. int psz = sizeof(addr);
  46. if (0 == WSAStringToAddress((char*)cp, AF_INET, nullptr, (LPSOCKADDR)&addr, &psz)) {
  47. memcpy(inp, &addr.sin_addr, sizeof(in_addr));
  48. return 1;
  49. }
  50. return 0;
  51. }
  52. #if (_WIN32_WINNT < 0x0600)
  53. const char* inet_ntop(int af, const void* src, char* dst, socklen_t size) {
  54. if (af != AF_INET) {
  55. errno = EINVAL;
  56. return 0;
  57. }
  58. const ui8* ia = (ui8*)src;
  59. if (snprintf(dst, size, "%u.%u.%u.%u", ia[0], ia[1], ia[2], ia[3]) >= (int)size) {
  60. errno = ENOSPC;
  61. return 0;
  62. }
  63. return dst;
  64. }
  65. struct evpair {
  66. int event;
  67. int winevent;
  68. };
  69. static const evpair evpairs_to_win[] = {
  70. {POLLIN, FD_READ | FD_CLOSE | FD_ACCEPT},
  71. {POLLRDNORM, FD_READ | FD_CLOSE | FD_ACCEPT},
  72. {POLLRDBAND, -1},
  73. {POLLPRI, -1},
  74. {POLLOUT, FD_WRITE | FD_CLOSE},
  75. {POLLWRNORM, FD_WRITE | FD_CLOSE},
  76. {POLLWRBAND, -1},
  77. {POLLERR, 0},
  78. {POLLHUP, 0},
  79. {POLLNVAL, 0}};
  80. static const size_t nevpairs_to_win = sizeof(evpairs_to_win) / sizeof(evpairs_to_win[0]);
  81. static const evpair evpairs_to_unix[] = {
  82. {FD_ACCEPT, POLLIN | POLLRDNORM},
  83. {FD_READ, POLLIN | POLLRDNORM},
  84. {FD_WRITE, POLLOUT | POLLWRNORM},
  85. {FD_CLOSE, POLLHUP},
  86. };
  87. static const size_t nevpairs_to_unix = sizeof(evpairs_to_unix) / sizeof(evpairs_to_unix[0]);
  88. static int convert_events(int events, const evpair* evpairs, size_t nevpairs, bool ignoreUnknown) noexcept {
  89. int result = 0;
  90. for (size_t i = 0; i < nevpairs; ++i) {
  91. int event = evpairs[i].event;
  92. if (events & event) {
  93. events ^= event;
  94. long winEvent = evpairs[i].winevent;
  95. if (winEvent == -1)
  96. return -1;
  97. if (winEvent == 0)
  98. continue;
  99. result |= winEvent;
  100. }
  101. }
  102. if (events != 0 && !ignoreUnknown)
  103. return -1;
  104. return result;
  105. }
  106. class TWSAEventHolder {
  107. private:
  108. HANDLE Event;
  109. public:
  110. inline TWSAEventHolder(HANDLE event) noexcept
  111. : Event(event)
  112. {
  113. }
  114. inline ~TWSAEventHolder() {
  115. WSACloseEvent(Event);
  116. }
  117. inline HANDLE Get() noexcept {
  118. return Event;
  119. }
  120. };
  121. int poll(struct pollfd fds[], nfds_t nfds, int timeout) noexcept {
  122. HANDLE rawEvent = WSACreateEvent();
  123. if (rawEvent == WSA_INVALID_EVENT) {
  124. errno = EIO;
  125. return -1;
  126. }
  127. TWSAEventHolder event(rawEvent);
  128. int checked_sockets = 0;
  129. for (pollfd* fd = fds; fd < fds + nfds; ++fd) {
  130. int win_events = convert_events(fd->events, evpairs_to_win, nevpairs_to_win, false);
  131. if (win_events == -1) {
  132. errno = EINVAL;
  133. return -1;
  134. }
  135. fd->revents = 0;
  136. if (WSAEventSelect(fd->fd, event.Get(), win_events)) {
  137. int error = WSAGetLastError();
  138. if (error == WSAEINVAL || error == WSAENOTSOCK) {
  139. fd->revents = POLLNVAL;
  140. ++checked_sockets;
  141. } else {
  142. errno = EIO;
  143. return -1;
  144. }
  145. }
  146. fd_set readfds;
  147. fd_set writefds;
  148. struct timeval timeout = {0, 0};
  149. FD_ZERO(&readfds);
  150. FD_ZERO(&writefds);
  151. if (fd->events & POLLIN) {
  152. FD_SET(fd->fd, &readfds);
  153. }
  154. if (fd->events & POLLOUT) {
  155. FD_SET(fd->fd, &writefds);
  156. }
  157. int error = select(0, &readfds, &writefds, nullptr, &timeout);
  158. if (error > 0) {
  159. if (FD_ISSET(fd->fd, &readfds)) {
  160. fd->revents |= POLLIN;
  161. }
  162. if (FD_ISSET(fd->fd, &writefds)) {
  163. fd->revents |= POLLOUT;
  164. }
  165. ++checked_sockets;
  166. }
  167. }
  168. if (checked_sockets > 0) {
  169. // returns without wait since we already have sockets in desired conditions
  170. return checked_sockets;
  171. }
  172. HANDLE events[] = {event.Get()};
  173. DWORD wait_result = WSAWaitForMultipleEvents(1, events, TRUE, timeout, FALSE);
  174. if (wait_result == WSA_WAIT_TIMEOUT)
  175. return 0;
  176. else if (wait_result == WSA_WAIT_EVENT_0) {
  177. for (pollfd* fd = fds; fd < fds + nfds; ++fd) {
  178. if (fd->revents == POLLNVAL)
  179. continue;
  180. WSANETWORKEVENTS network_events;
  181. if (WSAEnumNetworkEvents(fd->fd, event.Get(), &network_events)) {
  182. errno = EIO;
  183. return -1;
  184. }
  185. fd->revents = 0;
  186. for (int i = 0; i < FD_MAX_EVENTS; ++i) {
  187. if ((network_events.lNetworkEvents & (1 << i)) != 0 && network_events.iErrorCode[i]) {
  188. fd->revents = POLLERR;
  189. break;
  190. }
  191. }
  192. if (fd->revents == POLLERR)
  193. continue;
  194. if (network_events.lNetworkEvents) {
  195. fd->revents = static_cast<short>(convert_events(network_events.lNetworkEvents, evpairs_to_unix, nevpairs_to_unix, true));
  196. if (fd->revents & POLLHUP) {
  197. fd->revents &= POLLHUP | POLLIN | POLLRDNORM;
  198. }
  199. }
  200. }
  201. int chanded_sockets = 0;
  202. for (pollfd* fd = fds; fd < fds + nfds; ++fd)
  203. if (fd->revents != 0)
  204. ++chanded_sockets;
  205. return chanded_sockets;
  206. } else {
  207. errno = EIO;
  208. return -1;
  209. }
  210. }
  211. #endif
  212. #endif
  213. bool GetRemoteAddr(SOCKET Socket, char* str, socklen_t size) {
  214. if (!size) {
  215. return false;
  216. }
  217. TOpaqueAddr addr;
  218. if (getpeername(Socket, addr.MutableAddr(), addr.LenPtr()) != 0) {
  219. return false;
  220. }
  221. try {
  222. TMemoryOutput out(str, size - 1);
  223. PrintHost(out, addr);
  224. *out.Buf() = 0;
  225. return true;
  226. } catch (...) {
  227. // ¯\_(ツ)_/¯
  228. }
  229. return false;
  230. }
  231. void SetSocketTimeout(SOCKET s, long timeout) {
  232. SetSocketTimeout(s, timeout, 0);
  233. }
  234. void SetSocketTimeout(SOCKET s, long sec, long msec) {
  235. #ifdef SO_SNDTIMEO
  236. #ifdef _darwin_
  237. const timeval timeout = {sec, (__darwin_suseconds_t)msec * 1000};
  238. #elif defined(_unix_)
  239. const timeval timeout = {sec, msec * 1000};
  240. #else
  241. const int timeout = sec * 1000 + msec;
  242. #endif
  243. CheckedSetSockOpt(s, SOL_SOCKET, SO_RCVTIMEO, timeout, "recv timeout");
  244. CheckedSetSockOpt(s, SOL_SOCKET, SO_SNDTIMEO, timeout, "send timeout");
  245. #endif
  246. }
  247. void SetLinger(SOCKET s, bool on, unsigned len) {
  248. #ifdef SO_LINGER
  249. struct linger l = {on, (u_short)len};
  250. CheckedSetSockOpt(s, SOL_SOCKET, SO_LINGER, l, "linger");
  251. #endif
  252. }
  253. void SetZeroLinger(SOCKET s) {
  254. SetLinger(s, 1, 0);
  255. }
  256. void SetKeepAlive(SOCKET s, bool value) {
  257. CheckedSetSockOpt(s, SOL_SOCKET, SO_KEEPALIVE, (int)value, "keepalive");
  258. }
  259. void SetOutputBuffer(SOCKET s, unsigned value) {
  260. CheckedSetSockOpt(s, SOL_SOCKET, SO_SNDBUF, value, "output buffer");
  261. }
  262. void SetInputBuffer(SOCKET s, unsigned value) {
  263. CheckedSetSockOpt(s, SOL_SOCKET, SO_RCVBUF, value, "input buffer");
  264. }
  265. #if defined(_linux_) && !defined(SO_REUSEPORT)
  266. #define SO_REUSEPORT 15
  267. #endif
  268. void SetReusePort(SOCKET s, bool value) {
  269. #if defined(SO_REUSEPORT)
  270. CheckedSetSockOpt(s, SOL_SOCKET, SO_REUSEPORT, (int)value, "reuse port");
  271. #else
  272. Y_UNUSED(s);
  273. Y_UNUSED(value);
  274. ythrow TSystemError(ENOSYS) << "SO_REUSEPORT is not defined";
  275. #endif
  276. }
  277. void SetNoDelay(SOCKET s, bool value) {
  278. CheckedSetSockOpt(s, IPPROTO_TCP, TCP_NODELAY, (int)value, "tcp no delay");
  279. }
  280. void SetCloseOnExec(SOCKET s, bool value) {
  281. #if defined(_unix_)
  282. int flags = fcntl(s, F_GETFD);
  283. if (flags == -1) {
  284. ythrow TSystemError() << "fcntl() failed";
  285. }
  286. if (value) {
  287. flags |= FD_CLOEXEC;
  288. } else {
  289. flags &= ~FD_CLOEXEC;
  290. }
  291. if (fcntl(s, F_SETFD, flags) == -1) {
  292. ythrow TSystemError() << "fcntl() failed";
  293. }
  294. #else
  295. Y_UNUSED(s);
  296. Y_UNUSED(value);
  297. #endif
  298. }
  299. size_t GetMaximumSegmentSize(SOCKET s) {
  300. #if defined(TCP_MAXSEG)
  301. int val;
  302. if (GetSockOpt(s, IPPROTO_TCP, TCP_MAXSEG, val) == 0) {
  303. return (size_t)val;
  304. }
  305. #endif
  306. /*
  307. * probably a good guess...
  308. */
  309. return 8192;
  310. }
  311. size_t GetMaximumTransferUnit(SOCKET /*s*/) {
  312. // for someone who'll dare to write it
  313. // Linux: there rummored to be IP_MTU getsockopt() request
  314. // FreeBSD: request to a socket of type PF_ROUTE
  315. // with peer address as a destination argument
  316. return 8192;
  317. }
  318. int GetSocketToS(SOCKET s) {
  319. TOpaqueAddr addr;
  320. if (getsockname(s, addr.MutableAddr(), addr.LenPtr()) < 0) {
  321. ythrow TSystemError() << "getsockname() failed";
  322. }
  323. return GetSocketToS(s, &addr);
  324. }
  325. int GetSocketToS(SOCKET s, const IRemoteAddr* addr) {
  326. int result = 0;
  327. switch (addr->Addr()->sa_family) {
  328. case AF_INET:
  329. CheckedGetSockOpt(s, IPPROTO_IP, IP_TOS, result, "tos");
  330. break;
  331. case AF_INET6:
  332. #ifdef IPV6_TCLASS
  333. CheckedGetSockOpt(s, IPPROTO_IPV6, IPV6_TCLASS, result, "tos");
  334. #endif
  335. break;
  336. }
  337. return result;
  338. }
  339. void SetSocketToS(SOCKET s, const NAddr::IRemoteAddr* addr, int tos) {
  340. switch (addr->Addr()->sa_family) {
  341. case AF_INET:
  342. CheckedSetSockOpt(s, IPPROTO_IP, IP_TOS, tos, "tos");
  343. return;
  344. case AF_INET6:
  345. #ifdef IPV6_TCLASS
  346. CheckedSetSockOpt(s, IPPROTO_IPV6, IPV6_TCLASS, tos, "tos");
  347. return;
  348. #endif
  349. break;
  350. }
  351. ythrow yexception() << "SetSocketToS unsupported for family " << addr->Addr()->sa_family;
  352. }
  353. void SetSocketToS(SOCKET s, int tos) {
  354. TOpaqueAddr addr;
  355. if (getsockname(s, addr.MutableAddr(), addr.LenPtr()) < 0) {
  356. ythrow TSystemError() << "getsockname() failed";
  357. }
  358. SetSocketToS(s, &addr, tos);
  359. }
  360. void SetSocketPriority(SOCKET s, int priority) {
  361. #if defined(SO_PRIORITY)
  362. CheckedSetSockOpt(s, SOL_SOCKET, SO_PRIORITY, priority, "priority");
  363. #else
  364. Y_UNUSED(s);
  365. Y_UNUSED(priority);
  366. #endif
  367. }
  368. bool HasLocalAddress(SOCKET socket) {
  369. TOpaqueAddr localAddr;
  370. if (getsockname(socket, localAddr.MutableAddr(), localAddr.LenPtr()) != 0) {
  371. ythrow TSystemError() << "HasLocalAddress: getsockname() failed. ";
  372. }
  373. if (IsLoopback(localAddr)) {
  374. return true;
  375. }
  376. TOpaqueAddr remoteAddr;
  377. if (getpeername(socket, remoteAddr.MutableAddr(), remoteAddr.LenPtr()) != 0) {
  378. ythrow TSystemError() << "HasLocalAddress: getpeername() failed. ";
  379. }
  380. return IsSame(localAddr, remoteAddr);
  381. }
  382. namespace {
  383. #if defined(_linux_)
  384. #if !defined(TCP_FASTOPEN)
  385. #define TCP_FASTOPEN 23
  386. #endif
  387. #endif
  388. #if defined(TCP_FASTOPEN)
  389. struct TTcpFastOpenFeature {
  390. inline TTcpFastOpenFeature()
  391. : HasFastOpen_(false)
  392. {
  393. TSocketHolder tmp(socket(AF_INET, SOCK_STREAM, 0));
  394. int val = 1;
  395. int ret = SetSockOpt(tmp, IPPROTO_TCP, TCP_FASTOPEN, val);
  396. HasFastOpen_ = (ret == 0);
  397. }
  398. inline void SetFastOpen(SOCKET s, int qlen) const {
  399. if (HasFastOpen_) {
  400. CheckedSetSockOpt(s, IPPROTO_TCP, TCP_FASTOPEN, qlen, "setting TCP_FASTOPEN");
  401. }
  402. }
  403. static inline const TTcpFastOpenFeature* Instance() noexcept {
  404. return Singleton<TTcpFastOpenFeature>();
  405. }
  406. bool HasFastOpen_;
  407. };
  408. #endif
  409. }
  410. void SetTcpFastOpen(SOCKET s, int qlen) {
  411. #if defined(TCP_FASTOPEN)
  412. TTcpFastOpenFeature::Instance()->SetFastOpen(s, qlen);
  413. #else
  414. Y_UNUSED(s);
  415. Y_UNUSED(qlen);
  416. #endif
  417. }
  418. static bool IsBlocked(int lasterr) noexcept {
  419. return lasterr == EAGAIN || lasterr == EWOULDBLOCK;
  420. }
  421. struct TUnblockingGuard {
  422. SOCKET S_;
  423. TUnblockingGuard(SOCKET s)
  424. : S_(s)
  425. {
  426. SetNonBlock(S_, true);
  427. }
  428. ~TUnblockingGuard() {
  429. SetNonBlock(S_, false);
  430. }
  431. };
  432. static int MsgPeek(SOCKET s) {
  433. int flags = MSG_PEEK;
  434. #if defined(_win_)
  435. TUnblockingGuard unblocker(s);
  436. Y_UNUSED(unblocker);
  437. #else
  438. flags |= MSG_DONTWAIT;
  439. #endif
  440. char c;
  441. return recv(s, &c, 1, flags);
  442. }
  443. bool IsNotSocketClosedByOtherSide(SOCKET s) {
  444. return HasSocketDataToRead(s) != ESocketReadStatus::SocketClosed;
  445. }
  446. ESocketReadStatus HasSocketDataToRead(SOCKET s) {
  447. const int r = MsgPeek(s);
  448. if (r == -1 && IsBlocked(LastSystemError())) {
  449. return ESocketReadStatus::NoData;
  450. }
  451. if (r > 0) {
  452. return ESocketReadStatus::HasData;
  453. }
  454. return ESocketReadStatus::SocketClosed;
  455. }
  456. #if defined(_win_)
  457. static ssize_t DoSendMsg(SOCKET sock, const struct iovec* iov, int iovcnt) {
  458. return writev(sock, iov, iovcnt);
  459. }
  460. #else
  461. static ssize_t DoSendMsg(SOCKET sock, const struct iovec* iov, int iovcnt) {
  462. struct msghdr message;
  463. Zero(message);
  464. message.msg_iov = const_cast<struct iovec*>(iov);
  465. message.msg_iovlen = iovcnt;
  466. return sendmsg(sock, &message, MSG_NOSIGNAL);
  467. }
  468. #endif
  469. void TSocketHolder::Close() noexcept {
  470. if (Fd_ != INVALID_SOCKET) {
  471. bool ok = (closesocket(Fd_) == 0);
  472. if (!ok) {
  473. // Do not quietly close bad descriptor,
  474. // because often it means double close
  475. // that is disasterous
  476. #ifdef _win_
  477. Y_ABORT_UNLESS(WSAGetLastError() != WSAENOTSOCK, "must not quietly close bad socket descriptor");
  478. #elif defined(_unix_)
  479. Y_ABORT_UNLESS(errno != EBADF, "must not quietly close bad descriptor: fd=%d", int(Fd_));
  480. #else
  481. #error unsupported platform
  482. #endif
  483. }
  484. Fd_ = INVALID_SOCKET;
  485. }
  486. }
  487. class TSocket::TImpl: public TAtomicRefCount<TImpl> {
  488. using TOps = TSocket::TOps;
  489. public:
  490. inline TImpl(SOCKET fd, TOps* ops)
  491. : Fd_(fd)
  492. , Ops_(ops)
  493. {
  494. }
  495. inline ~TImpl() = default;
  496. inline SOCKET Fd() const noexcept {
  497. return Fd_;
  498. }
  499. inline ssize_t Send(const void* data, size_t len) {
  500. return Ops_->Send(Fd_, data, len);
  501. }
  502. inline ssize_t Recv(void* buf, size_t len) {
  503. return Ops_->Recv(Fd_, buf, len);
  504. }
  505. inline ssize_t SendV(const TPart* parts, size_t count) {
  506. return Ops_->SendV(Fd_, parts, count);
  507. }
  508. inline void Close() {
  509. Fd_.Close();
  510. }
  511. private:
  512. TSocketHolder Fd_;
  513. TOps* Ops_;
  514. };
  515. template <>
  516. void Out<const struct addrinfo*>(IOutputStream& os, const struct addrinfo* ai) {
  517. if (ai->ai_flags & AI_CANONNAME) {
  518. os << "`" << ai->ai_canonname << "' ";
  519. }
  520. os << '[';
  521. for (int i = 0; ai; ++i, ai = ai->ai_next) {
  522. if (i > 0) {
  523. os << ", ";
  524. }
  525. os << (const IRemoteAddr&)TAddrInfo(ai);
  526. }
  527. os << ']';
  528. }
  529. template <>
  530. void Out<struct addrinfo*>(IOutputStream& os, struct addrinfo* ai) {
  531. Out<const struct addrinfo*>(os, static_cast<const struct addrinfo*>(ai));
  532. }
  533. template <>
  534. void Out<TNetworkAddress>(IOutputStream& os, const TNetworkAddress& addr) {
  535. os << &*addr.Begin();
  536. }
  537. static inline const struct addrinfo* Iterate(const struct addrinfo* addr, const struct addrinfo* addr0, const int sockerr) {
  538. if (addr->ai_next) {
  539. return addr->ai_next;
  540. }
  541. ythrow TSystemError(sockerr) << "can not connect to " << addr0;
  542. }
  543. static inline SOCKET DoConnectImpl(const struct addrinfo* res, const TInstant& deadLine) {
  544. const struct addrinfo* addr0 = res;
  545. while (res) {
  546. TSocketHolder s(socket(res->ai_family, res->ai_socktype, res->ai_protocol));
  547. if (s.Closed()) {
  548. res = Iterate(res, addr0, LastSystemError());
  549. continue;
  550. }
  551. SetNonBlock(s, true);
  552. if (connect(s, res->ai_addr, (int)res->ai_addrlen)) {
  553. int err = LastSystemError();
  554. if (err == EINPROGRESS || err == EAGAIN || err == EWOULDBLOCK) {
  555. /*
  556. * must wait
  557. */
  558. struct pollfd p = {
  559. (SOCKET)s,
  560. POLLOUT,
  561. 0};
  562. const ssize_t n = PollD(&p, 1, deadLine);
  563. /*
  564. * timeout occured
  565. */
  566. if (n < 0) {
  567. ythrow TSystemError(-(int)n) << "can not connect";
  568. }
  569. CheckedGetSockOpt(s, SOL_SOCKET, SO_ERROR, err, "socket error");
  570. if (!err) {
  571. return s.Release();
  572. }
  573. }
  574. res = Iterate(res, addr0, err);
  575. continue;
  576. }
  577. return s.Release();
  578. }
  579. ythrow yexception() << "something went wrong: nullptr at addrinfo";
  580. }
  581. static inline SOCKET DoConnect(const struct addrinfo* res, const TInstant& deadLine) {
  582. TSocketHolder ret(DoConnectImpl(res, deadLine));
  583. SetNonBlock(ret, false);
  584. return ret.Release();
  585. }
  586. static inline ssize_t DoSendV(SOCKET fd, const struct iovec* iov, size_t count) {
  587. ssize_t ret = -1;
  588. do {
  589. ret = DoSendMsg(fd, iov, (int)count);
  590. } while (ret == -1 && errno == EINTR);
  591. if (ret < 0) {
  592. return -LastSystemError();
  593. }
  594. return ret;
  595. }
  596. template <bool isCompat>
  597. struct TSender {
  598. using TPart = TSocket::TPart;
  599. static inline ssize_t SendV(SOCKET fd, const TPart* parts, size_t count) {
  600. return DoSendV(fd, (const iovec*)parts, count);
  601. }
  602. };
  603. template <>
  604. struct TSender<false> {
  605. using TPart = TSocket::TPart;
  606. static inline ssize_t SendV(SOCKET fd, const TPart* parts, size_t count) {
  607. TTempBuf tempbuf(sizeof(struct iovec) * count);
  608. struct iovec* iov = (struct iovec*)tempbuf.Data();
  609. for (size_t i = 0; i < count; ++i) {
  610. struct iovec& io = iov[i];
  611. const TPart& part = parts[i];
  612. io.iov_base = (char*)part.buf;
  613. io.iov_len = part.len;
  614. }
  615. return DoSendV(fd, iov, count);
  616. }
  617. };
  618. class TCommonSockOps: public TSocket::TOps {
  619. using TPart = TSocket::TPart;
  620. public:
  621. inline TCommonSockOps() noexcept {
  622. }
  623. ~TCommonSockOps() override = default;
  624. ssize_t Send(SOCKET fd, const void* data, size_t len) override {
  625. ssize_t ret = -1;
  626. do {
  627. ret = send(fd, (const char*)data, (int)len, MSG_NOSIGNAL);
  628. } while (ret == -1 && errno == EINTR);
  629. if (ret < 0) {
  630. return -LastSystemError();
  631. }
  632. return ret;
  633. }
  634. ssize_t Recv(SOCKET fd, void* buf, size_t len) override {
  635. ssize_t ret = -1;
  636. do {
  637. ret = recv(fd, (char*)buf, (int)len, 0);
  638. } while (ret == -1 && errno == EINTR);
  639. if (ret < 0) {
  640. return -LastSystemError();
  641. }
  642. return ret;
  643. }
  644. ssize_t SendV(SOCKET fd, const TPart* parts, size_t count) override {
  645. ssize_t ret = SendVImpl(fd, parts, count);
  646. if (ret < 0) {
  647. return ret;
  648. }
  649. size_t len = TContIOVector::Bytes(parts, count);
  650. if ((size_t)ret == len) {
  651. return ret;
  652. }
  653. return SendVPartial(fd, parts, count, ret);
  654. }
  655. inline ssize_t SendVImpl(SOCKET fd, const TPart* parts, size_t count) {
  656. return TSender < (sizeof(iovec) == sizeof(TPart)) && (offsetof(iovec, iov_base) == offsetof(TPart, buf)) && (offsetof(iovec, iov_len) == offsetof(TPart, len)) > ::SendV(fd, parts, count);
  657. }
  658. ssize_t SendVPartial(SOCKET fd, const TPart* constParts, size_t count, size_t written);
  659. };
  660. ssize_t TCommonSockOps::SendVPartial(SOCKET fd, const TPart* constParts, size_t count, size_t written) {
  661. TTempBuf tempbuf(sizeof(TPart) * count);
  662. TPart* parts = (TPart*)tempbuf.Data();
  663. for (size_t i = 0; i < count; ++i) {
  664. parts[i] = constParts[i];
  665. }
  666. TContIOVector vec(parts, count);
  667. vec.Proceed(written);
  668. while (!vec.Complete()) {
  669. ssize_t ret = SendVImpl(fd, vec.Parts(), vec.Count());
  670. if (ret < 0) {
  671. return ret;
  672. }
  673. written += ret;
  674. vec.Proceed((size_t)ret);
  675. }
  676. return written;
  677. }
  678. static inline TSocket::TOps* GetCommonSockOps() noexcept {
  679. return Singleton<TCommonSockOps>();
  680. }
  681. TSocket::TSocket()
  682. : Impl_(new TImpl(INVALID_SOCKET, GetCommonSockOps()))
  683. {
  684. }
  685. TSocket::TSocket(SOCKET fd)
  686. : Impl_(new TImpl(fd, GetCommonSockOps()))
  687. {
  688. }
  689. TSocket::TSocket(SOCKET fd, TOps* ops)
  690. : Impl_(new TImpl(fd, ops))
  691. {
  692. }
  693. TSocket::TSocket(const TNetworkAddress& addr)
  694. : Impl_(new TImpl(DoConnect(addr.Info(), TInstant::Max()), GetCommonSockOps()))
  695. {
  696. }
  697. TSocket::TSocket(const TNetworkAddress& addr, const TDuration& timeOut)
  698. : Impl_(new TImpl(DoConnect(addr.Info(), timeOut.ToDeadLine()), GetCommonSockOps()))
  699. {
  700. }
  701. TSocket::TSocket(const TNetworkAddress& addr, const TInstant& deadLine)
  702. : Impl_(new TImpl(DoConnect(addr.Info(), deadLine), GetCommonSockOps()))
  703. {
  704. }
  705. TSocket::~TSocket() = default;
  706. SOCKET TSocket::Fd() const noexcept {
  707. return Impl_->Fd();
  708. }
  709. ssize_t TSocket::Send(const void* data, size_t len) {
  710. return Impl_->Send(data, len);
  711. }
  712. ssize_t TSocket::Recv(void* buf, size_t len) {
  713. return Impl_->Recv(buf, len);
  714. }
  715. ssize_t TSocket::SendV(const TPart* parts, size_t count) {
  716. return Impl_->SendV(parts, count);
  717. }
  718. void TSocket::Close() {
  719. Impl_->Close();
  720. }
  721. TSocketInput::TSocketInput(const TSocket& s) noexcept
  722. : S_(s)
  723. {
  724. }
  725. TSocketInput::~TSocketInput() = default;
  726. size_t TSocketInput::DoRead(void* buf, size_t len) {
  727. const ssize_t ret = S_.Recv(buf, len);
  728. if (ret >= 0) {
  729. return (size_t)ret;
  730. }
  731. ythrow TSystemError(-(int)ret) << "can not read from socket input stream";
  732. }
  733. TSocketOutput::TSocketOutput(const TSocket& s) noexcept
  734. : S_(s)
  735. {
  736. }
  737. TSocketOutput::~TSocketOutput() {
  738. try {
  739. Finish();
  740. } catch (...) {
  741. // ¯\_(ツ)_/¯
  742. }
  743. }
  744. void TSocketOutput::DoWrite(const void* buf, size_t len) {
  745. size_t send = 0;
  746. while (len) {
  747. const ssize_t ret = S_.Send(buf, len);
  748. if (ret < 0) {
  749. ythrow TSystemError(-(int)ret) << "can not write to socket output stream; " << send << " bytes already send";
  750. }
  751. buf = (const char*)buf + ret;
  752. len -= ret;
  753. send += ret;
  754. }
  755. }
  756. void TSocketOutput::DoWriteV(const TPart* parts, size_t count) {
  757. const ssize_t ret = S_.SendV(parts, count);
  758. if (ret < 0) {
  759. ythrow TSystemError(-(int)ret) << "can not writev to socket output stream";
  760. }
  761. /*
  762. * todo for nonblocking sockets?
  763. */
  764. }
  765. namespace {
  766. //https://bugzilla.mozilla.org/attachment.cgi?id=503263&action=diff
  767. struct TLocalNames: public THashSet<TStringBuf> {
  768. inline TLocalNames() {
  769. insert("localhost");
  770. insert("localhost.localdomain");
  771. insert("localhost6");
  772. insert("localhost6.localdomain6");
  773. insert("::1");
  774. }
  775. inline bool IsLocalName(const char* name) const noexcept {
  776. struct sockaddr_in sa;
  777. memset(&sa, 0, sizeof(sa));
  778. if (inet_pton(AF_INET, name, &(sa.sin_addr)) == 1) {
  779. return (InetToHost(sa.sin_addr.s_addr) >> 24) == 127;
  780. }
  781. return contains(name);
  782. }
  783. };
  784. }
  785. class TNetworkAddress::TImpl: public TAtomicRefCount<TImpl> {
  786. private:
  787. class TAddrInfoDeleter {
  788. public:
  789. TAddrInfoDeleter(bool useFreeAddrInfo = true)
  790. : UseFreeAddrInfo_(useFreeAddrInfo)
  791. {
  792. }
  793. void operator()(struct addrinfo* ai) noexcept {
  794. if (!UseFreeAddrInfo_ && ai != NULL) {
  795. if (ai->ai_addr != NULL) {
  796. free(ai->ai_addr);
  797. }
  798. struct addrinfo* p;
  799. while (ai != NULL) {
  800. p = ai;
  801. ai = ai->ai_next;
  802. free(p->ai_canonname);
  803. free(p);
  804. }
  805. } else if (ai != NULL) {
  806. freeaddrinfo(ai);
  807. }
  808. }
  809. private:
  810. bool UseFreeAddrInfo_ = true;
  811. };
  812. public:
  813. inline TImpl(const char* host, ui16 port, int flags)
  814. : Info_(nullptr, TAddrInfoDeleter{})
  815. {
  816. const TString port_st(ToString(port));
  817. struct addrinfo hints;
  818. memset(&hints, 0, sizeof(hints));
  819. hints.ai_flags = flags;
  820. hints.ai_family = PF_UNSPEC;
  821. hints.ai_socktype = SOCK_STREAM;
  822. if (!host) {
  823. hints.ai_flags |= AI_PASSIVE;
  824. } else {
  825. if (!Singleton<TLocalNames>()->IsLocalName(host)) {
  826. hints.ai_flags |= AI_ADDRCONFIG;
  827. }
  828. }
  829. struct addrinfo* pai = NULL;
  830. const int error = getaddrinfo(host, port_st.data(), &hints, &pai);
  831. if (error) {
  832. TAddrInfoDeleter()(pai);
  833. ythrow TNetworkResolutionError(error) << ": can not resolve " << host << ":" << port;
  834. }
  835. Info_.reset(pai);
  836. }
  837. inline TImpl(const char* path, int flags)
  838. : Info_(nullptr, TAddrInfoDeleter{/* useFreeAddrInfo = */ false})
  839. {
  840. THolder<struct sockaddr_un, TFree> sockAddr(
  841. reinterpret_cast<struct sockaddr_un*>(malloc(sizeof(struct sockaddr_un))));
  842. Y_ENSURE(strlen(path) < sizeof(sockAddr->sun_path), "Unix socket path more than " << sizeof(sockAddr->sun_path));
  843. sockAddr->sun_family = AF_UNIX;
  844. strcpy(sockAddr->sun_path, path);
  845. TAddrInfoPtr hints(reinterpret_cast<struct addrinfo*>(malloc(sizeof(struct addrinfo))), TAddrInfoDeleter{/* useFreeAddrInfo = */ false});
  846. memset(hints.get(), 0, sizeof(*hints));
  847. hints->ai_flags = flags;
  848. hints->ai_family = AF_UNIX;
  849. hints->ai_socktype = SOCK_STREAM;
  850. hints->ai_addrlen = sizeof(*sockAddr);
  851. hints->ai_addr = (struct sockaddr*)sockAddr.Release();
  852. Info_.reset(hints.release());
  853. }
  854. inline struct addrinfo* Info() const noexcept {
  855. return Info_.get();
  856. }
  857. private:
  858. using TAddrInfoPtr = std::unique_ptr<struct addrinfo, TAddrInfoDeleter>;
  859. TAddrInfoPtr Info_;
  860. };
  861. TNetworkAddress::TNetworkAddress(const TUnixSocketPath& unixSocketPath, int flags)
  862. : Impl_(new TImpl(unixSocketPath.Path.data(), flags))
  863. {
  864. }
  865. TNetworkAddress::TNetworkAddress(const TString& host, ui16 port, int flags)
  866. : Impl_(new TImpl(host.data(), port, flags))
  867. {
  868. }
  869. TNetworkAddress::TNetworkAddress(const TString& host, ui16 port)
  870. : Impl_(new TImpl(host.data(), port, 0))
  871. {
  872. }
  873. TNetworkAddress::TNetworkAddress(ui16 port)
  874. : Impl_(new TImpl(nullptr, port, 0))
  875. {
  876. }
  877. TNetworkAddress::~TNetworkAddress() = default;
  878. struct addrinfo* TNetworkAddress::Info() const noexcept {
  879. return Impl_->Info();
  880. }
  881. TNetworkResolutionError::TNetworkResolutionError(int error) {
  882. const char* errMsg = nullptr;
  883. #ifdef _win_
  884. errMsg = LastSystemErrorText(error); // gai_strerror is not thread-safe on Windows
  885. #else
  886. errMsg = gai_strerror(error);
  887. #endif
  888. (*this) << errMsg << "(" << error;
  889. #if defined(_unix_)
  890. if (error == EAI_SYSTEM) {
  891. (*this) << "; errno=" << LastSystemError();
  892. }
  893. #endif
  894. (*this) << "): ";
  895. }
  896. #if defined(_unix_)
  897. static inline int GetFlags(int fd) {
  898. const int ret = fcntl(fd, F_GETFL);
  899. if (ret == -1) {
  900. ythrow TSystemError() << "can not get fd flags";
  901. }
  902. return ret;
  903. }
  904. static inline void SetFlags(int fd, int flags) {
  905. if (fcntl(fd, F_SETFL, flags) == -1) {
  906. ythrow TSystemError() << "can not set fd flags";
  907. }
  908. }
  909. static inline void EnableFlag(int fd, int flag) {
  910. const int oldf = GetFlags(fd);
  911. const int newf = oldf | flag;
  912. if (oldf != newf) {
  913. SetFlags(fd, newf);
  914. }
  915. }
  916. static inline void DisableFlag(int fd, int flag) {
  917. const int oldf = GetFlags(fd);
  918. const int newf = oldf & (~flag);
  919. if (oldf != newf) {
  920. SetFlags(fd, newf);
  921. }
  922. }
  923. static inline void SetFlag(int fd, int flag, bool value) {
  924. if (value) {
  925. EnableFlag(fd, flag);
  926. } else {
  927. DisableFlag(fd, flag);
  928. }
  929. }
  930. static inline bool FlagsAreEnabled(int fd, int flags) {
  931. return GetFlags(fd) & flags;
  932. }
  933. #endif
  934. #if defined(_win_)
  935. static inline void SetNonBlockSocket(SOCKET fd, int value) {
  936. unsigned long inbuf = value;
  937. unsigned long outbuf = 0;
  938. DWORD written = 0;
  939. if (!inbuf) {
  940. WSAEventSelect(fd, nullptr, 0);
  941. }
  942. if (WSAIoctl(fd, FIONBIO, &inbuf, sizeof(inbuf), &outbuf, sizeof(outbuf), &written, 0, 0) == SOCKET_ERROR) {
  943. ythrow TSystemError() << "can not set non block socket state";
  944. }
  945. }
  946. static inline bool IsNonBlockSocket(SOCKET fd) {
  947. unsigned long buf = 0;
  948. if (WSAIoctl(fd, FIONBIO, 0, 0, &buf, sizeof(buf), 0, 0, 0) == SOCKET_ERROR) {
  949. ythrow TSystemError() << "can not get non block socket state";
  950. }
  951. return buf;
  952. }
  953. #endif
  954. void SetNonBlock(SOCKET fd, bool value) {
  955. #if defined(_unix_)
  956. #if defined(FIONBIO)
  957. Y_UNUSED(SetFlag); // shut up clang about unused function
  958. int nb = value;
  959. if (ioctl(fd, FIONBIO, &nb) < 0) {
  960. ythrow TSystemError() << "ioctl failed";
  961. }
  962. #else
  963. SetFlag(fd, O_NONBLOCK, value);
  964. #endif
  965. #elif defined(_win_)
  966. SetNonBlockSocket(fd, value);
  967. #else
  968. #error todo
  969. #endif
  970. }
  971. bool IsNonBlock(SOCKET fd) {
  972. #if defined(_unix_)
  973. return FlagsAreEnabled(fd, O_NONBLOCK);
  974. #elif defined(_win_)
  975. return IsNonBlockSocket(fd);
  976. #else
  977. #error todo
  978. #endif
  979. }
  980. void SetDeferAccept(SOCKET s) {
  981. (void)s;
  982. #if defined(TCP_DEFER_ACCEPT)
  983. CheckedSetSockOpt(s, IPPROTO_TCP, TCP_DEFER_ACCEPT, 10, "defer accept");
  984. #endif
  985. #if defined(SO_ACCEPTFILTER)
  986. struct accept_filter_arg afa;
  987. Zero(afa);
  988. strcpy(afa.af_name, "dataready");
  989. SetSockOpt(s, SOL_SOCKET, SO_ACCEPTFILTER, afa);
  990. #endif
  991. }
  992. ssize_t PollD(struct pollfd fds[], nfds_t nfds, const TInstant& deadLine) noexcept {
  993. TInstant now = TInstant::Now();
  994. do {
  995. const TDuration toWait = PollStep(deadLine, now);
  996. const int res = poll(fds, nfds, MicroToMilli(toWait.MicroSeconds()));
  997. if (res > 0) {
  998. return res;
  999. }
  1000. if (res < 0) {
  1001. const int err = LastSystemError();
  1002. if (err != ETIMEDOUT && err != EINTR) {
  1003. return -err;
  1004. }
  1005. }
  1006. } while ((now = TInstant::Now()) < deadLine);
  1007. return -ETIMEDOUT;
  1008. }
  1009. void ShutDown(SOCKET s, int mode) {
  1010. if (shutdown(s, mode)) {
  1011. ythrow TSystemError() << "shutdown socket error";
  1012. }
  1013. }
  1014. extern "C" bool IsReusePortAvailable() {
  1015. // SO_REUSEPORT is always defined for linux builds, see SetReusePort() implementation above
  1016. #if defined(SO_REUSEPORT)
  1017. class TCtx {
  1018. public:
  1019. TCtx() {
  1020. TSocketHolder sock(::socket(AF_INET, SOCK_STREAM, 0));
  1021. const int e1 = errno;
  1022. if (sock == INVALID_SOCKET) {
  1023. ythrow TSystemError(e1) << "Cannot create AF_INET socket";
  1024. }
  1025. int val;
  1026. const int ret = GetSockOpt(sock, SOL_SOCKET, SO_REUSEPORT, val);
  1027. const int e2 = errno;
  1028. if (ret == 0) {
  1029. Flag_ = true;
  1030. } else {
  1031. if (e2 == ENOPROTOOPT) {
  1032. Flag_ = false;
  1033. } else {
  1034. ythrow TSystemError(e2) << "Unexpected error in getsockopt";
  1035. }
  1036. }
  1037. }
  1038. static inline const TCtx* Instance() noexcept {
  1039. return Singleton<TCtx>();
  1040. }
  1041. public:
  1042. bool Flag_;
  1043. };
  1044. return TCtx::Instance()->Flag_;
  1045. #else
  1046. return false;
  1047. #endif
  1048. }