Browse Source

Allow for a longer return if port is in use when server starts up.

Brian Aker 12 years ago
parent
commit
2ab50fe1be
9 changed files with 150 additions and 111 deletions
  1. 3 0
      ChangeLog
  2. 1 1
      configure.ac
  3. 115 92
      libgearman-server/gearmand.cc
  4. 2 0
      libgearman-server/io.cc
  5. 0 1
      libtest/cmdline.cc
  6. 3 3
      libtest/cmdline.h
  7. 5 8
      libtest/server.cc
  8. 5 5
      libtest/unittest.cc
  9. 16 1
      tests/cycle.cc

+ 3 - 0
ChangeLog

@@ -1,3 +1,6 @@
+1.0.3 
+* Allow for a longer set of retries if port is in use when the server starts up.
+
 1.0.2 Mon Dec 17 07:24:42 EST 2012
 * Add --expceptions to Gearmand to enable exceptions by default
 * Backtraces on some types of crashes.

+ 1 - 1
configure.ac

@@ -7,7 +7,7 @@
 # the COPYING file in this directory for full text.
 
 AC_PREREQ([2.63])
-AC_INIT([gearmand],[1.0.2],[https://bugs.launchpad.net/gearmand],[gearmand],[http://gearman.info/])
+AC_INIT([gearmand],[1.0.3],[https://bugs.launchpad.net/gearmand],[gearmand],[http://gearman.info/])
 AC_CONFIG_AUX_DIR([build-aux])
 AC_CONFIG_MACRO_DIR([m4])
 

+ 115 - 92
libgearman-server/gearmand.cc

@@ -447,6 +447,86 @@ void gearmand_wakeup(gearmand_st *gearmand, gearmand_wakeup_t wakeup)
  * Private definitions
  */
 
+gearmand_error_t set_socket(int& fd, struct addrinfo *addrinfo_next)
+{
+  /* Call to socket() can fail for some getaddrinfo results, try another. */
+  fd= socket(addrinfo_next->ai_family, addrinfo_next->ai_socktype,
+             addrinfo_next->ai_protocol);
+  if (fd == -1)
+  {
+    return gearmand_perror("socket()");
+  }
+
+#ifdef IPV6_V6ONLY
+  {
+    int flags= 1;
+    if (addrinfo_next->ai_family == AF_INET6)
+    {
+      flags= 1;
+      if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &flags, sizeof(flags)) == -1)
+      {
+        return gearmand_perror("setsockopt(IPV6_V6ONLY)");
+      }
+    }
+  }
+#endif
+
+  {
+    if (FD_CLOEXEC)
+    {
+      int flags;
+      do 
+      {
+        flags= fcntl(fd, F_GETFD, 0);
+      } while (flags == -1 and (errno == EINTR or errno == EAGAIN));
+
+      if (flags != -1)
+      {
+        int rval;
+        do
+        { 
+          rval= fcntl (fd, F_SETFD, flags | FD_CLOEXEC);
+        } while (rval == -1 && (errno == EINTR or errno == EAGAIN));
+        // we currently ignore the case where rval is -1
+      }
+    }
+  }
+
+  {
+    int flags= 1;
+    if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &flags, sizeof(flags)) == -1)
+    {
+      return gearmand_perror("setsockopt(SO_REUSEADDR)");
+    }
+  }
+
+  {
+    int flags= 1;
+    if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &flags, sizeof(flags)) == -1)
+    {
+      return gearmand_perror("setsockopt(SO_KEEPALIVE)");
+    }
+  }
+
+  {
+    struct linger ling= {0, 0};
+    if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &ling, sizeof(ling)) == -1)
+    {
+      return gearmand_perror("setsockopt(SO_LINGER)");
+    }
+  }
+
+  {
+    int flags= 1;
+    if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &flags, sizeof(flags)) == -1)
+    {
+      return gearmand_perror("setsockopt(TCP_NODELAY)");
+    }
+  }
+
+  return GEARMAN_SUCCESS;
+}
+
 static const uint32_t bind_timeout= 20; // Number is not special, but look at INFO messages if you decide to change it.
 
 typedef std::pair<std::string, std::string> host_port_t;
@@ -455,7 +535,6 @@ static gearmand_error_t _listen_init(gearmand_st *gearmand)
 {
   for (uint32_t x= 0; x < gearmand->port_count; x++)
   {
-    struct linger ling= {0, 0};
     struct addrinfo hints;
     struct addrinfo *addrinfo;
 
@@ -476,8 +555,7 @@ static gearmand_error_t _listen_init(gearmand_st *gearmand)
         {
           buffer[0]= 0;
         }
-        gearmand_gai_error(buffer, ret);
-        return GEARMAN_ERRNO;
+        return gearmand_gai_error(buffer, ret);
       }
     }
 
@@ -485,7 +563,6 @@ static gearmand_error_t _listen_init(gearmand_st *gearmand)
     for (struct addrinfo *addrinfo_next= addrinfo; addrinfo_next != NULL;
          addrinfo_next= addrinfo_next->ai_next)
     {
-      int fd;
       char host[NI_MAXHOST];
 
       {
@@ -512,69 +589,6 @@ static gearmand_error_t _listen_init(gearmand_st *gearmand)
 
       gearmand_log_debug(GEARMAN_DEFAULT_LOG_PARAM, "Trying to listen on %s:%s", host, port->port);
 
-      /* Call to socket() can fail for some getaddrinfo results, try another. */
-      fd= socket(addrinfo_next->ai_family, addrinfo_next->ai_socktype,
-                 addrinfo_next->ai_protocol);
-      if (fd == -1)
-      {
-        gearmand_log_error(GEARMAN_DEFAULT_LOG_PARAM, "Failed to listen on %s:%s", host, port->port);
-        continue;
-      }
-
-      int flags= 1;
-#ifdef IPV6_V6ONLY
-      if (addrinfo_next->ai_family == AF_INET6)
-      {
-        flags= 1;
-        if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &flags, sizeof(flags)) == -1)
-        {
-          gearmand_perror("setsockopt(IPV6_V6ONLY)");
-          return GEARMAN_ERRNO;
-        }
-      }
-#endif
-
-      {
-        int ret= fcntl(fd, F_SETFD, FD_CLOEXEC);
-        if (ret != 0 || !(fcntl(fd, F_GETFD, 0) & FD_CLOEXEC))
-        {
-          gearmand_perror("fcntl(FD_CLOEXEC)");
-          return GEARMAN_ERRNO;
-        }
-      }
-
-      {
-        if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &flags, sizeof(flags)) == -1)
-        {
-          gearmand_perror("setsockopt(SO_REUSEADDR)");
-          return GEARMAN_ERRNO;
-        }
-      }
-
-      {
-        if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &flags, sizeof(flags)) == -1)
-        {
-          gearmand_perror("setsockopt(SO_KEEPALIVE)");
-          return GEARMAN_ERRNO;
-        }
-      }
-
-      {
-        if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &ling, sizeof(ling)) == -1)
-        {
-          gearmand_perror("setsockopt(SO_LINGER)");
-          return GEARMAN_ERRNO;
-        }
-      }
-
-      {
-        if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &flags, sizeof(flags)) == -1)
-        {
-          gearmand_perror("setsockopt(TCP_NODELAY)");
-          return GEARMAN_ERRNO;
-        }
-      }
-
       /*
         @note logic for this pulled from Drizzle.
 
@@ -589,40 +603,49 @@ static gearmand_error_t _listen_init(gearmand_st *gearmand)
       uint32_t this_wait;
       uint32_t retry;
       int ret= -1;
-      for (waited= 0, retry= 4; ; retry++, waited+= this_wait)
+      int fd;
+      for (waited= 0, retry= 1; ; retry++, waited+= this_wait)
       {
-        if (((ret= bind(fd, addrinfo_next->ai_addr, addrinfo_next->ai_addrlen)) == 0) or
-            (errno != EADDRINUSE) || (waited >= bind_timeout))
+        gearmand_debug("set_socket");
+        { 
+          gearmand_error_t socket_ret;
+          if (gearmand_failed(socket_ret= set_socket(fd, addrinfo_next)))
+          {
+            gearmand_sockfd_close(fd);
+            return socket_ret;
+          }
+        }
+
+        errno= 0;
+        if ((ret= bind(fd, addrinfo_next->ai_addr, addrinfo_next->ai_addrlen)) == 0)
         {
+          // Success
           break;
         }
-
-        // We are in single user threads, so strerror() is fine.
-        gearmand_log_debug(GEARMAN_DEFAULT_LOG_PARAM, "Retrying bind(%s) on %s:%s %u >= %u", 
-                           strerror(errno), host, port->port,
-                           waited, bind_timeout);
-        this_wait= retry * retry / 3 + 1;
-        sleep(this_wait);
-      }
-
-      if (ret < 0)
-      {
-        gearmand_perror("bind");
-
+        // Protect our error
+        ret= errno;
         gearmand_sockfd_close(fd);
-
-        if (errno == EADDRINUSE)
+        
+        if (waited >= bind_timeout)
         {
-          if (port->listen_fd == NULL)
-          {
-            gearmand_log_error(GEARMAN_DEFAULT_LOG_PARAM, "Address already in use %s:%s", host, port->port);
-          }
+          return gearmand_log_error(GEARMAN_DEFAULT_LOG_PARAM, "Timeout occured when calling bind() for %s:%s", host, port->port);
+        }
 
-          continue;
+        if (ret != EADDRINUSE)
+        {
+          errno= ret;
+          return gearmand_perror("bind");
         }
 
-        gearmand_perror("bind");
-        return GEARMAN_ERRNO;
+        this_wait= retry * retry / 3 + 1;
+
+        // We are in single user threads, so strerror() is fine.
+        gearmand_log_debug(GEARMAN_DEFAULT_LOG_PARAM, "Retrying bind(%s) on %s:%s %u + %u >= %u", 
+                           strerror(ret), host, port->port,
+                           waited, this_wait, bind_timeout);
+
+        struct timespec requested= { this_wait, 0 };
+        nanosleep(&requested, NULL);
       }
 
       if (listen(fd, gearmand->backlog) == -1)

+ 2 - 0
libgearman-server/io.cc

@@ -874,7 +874,9 @@ static gearmand_error_t _io_setsockopt(gearmand_io_st &connection)
 void gearmand_sockfd_close(int sockfd)
 {
   if (sockfd == INVALID_SOCKET)
+  {
     return;
+  }
 
   /* in case of death shutdown to avoid blocking at close() */
   if (shutdown(sockfd, SHUT_RDWR) == SOCKET_ERROR && get_socket_errno() != ENOTCONN)

+ 0 - 1
libtest/cmdline.cc

@@ -409,7 +409,6 @@ bool Application::slurp()
 Application::error_t Application::join()
 {
   pid_t waited_pid= waitpid(_pid, &_status, 0);
-
   if (waited_pid == _pid and WIFEXITED(_status) == false)
   {
     /*

+ 3 - 3
libtest/cmdline.h

@@ -162,7 +162,7 @@ public:
 
   std::string print();
 
-  void use_valgrind(bool arg= true)
+  void use_valgrind(bool arg)
   {
     _use_valgrind= arg;
   }
@@ -172,12 +172,12 @@ public:
   bool slurp();
   void murder();
 
-  void use_gdb(bool arg= true)
+  void use_gdb(bool arg)
   {
     _use_gdb= arg;
   }
 
-  void use_ptrcheck(bool arg= true)
+  void use_ptrcheck(bool arg)
   {
     _use_ptrcheck= arg;
   }

+ 5 - 8
libtest/server.cc

@@ -189,13 +189,10 @@ bool Server::start()
 #endif
   }
 
-  // This needs more work.
-#if 0
-  if (gdb_is_caller())
+  if (getenv("YATL_GDB_SERVER"))
   {
-    _app.use_gdb();
+    _app.use_gdb(true);
   }
-#endif
 
   if (port() == LIBTEST_FAIL_PORT)
   {
@@ -205,11 +202,11 @@ bool Server::start()
 
   if (getenv("YATL_PTRCHECK_SERVER"))
   {
-    _app.use_ptrcheck();
+    _app.use_ptrcheck(true);
   }
   else if (getenv("YATL_VALGRIND_SERVER"))
   {
-    _app.use_valgrind();
+    _app.use_valgrind(true);
   }
 
   out_of_ban_killed(false);
@@ -275,7 +272,7 @@ bool Server::start()
     uint32_t waited;
     uint32_t retry;
 
-    for (waited= 0, retry= 7; ; retry++, waited+= this_wait)
+    for (waited= 0, retry= 1; ; retry++, waited+= this_wait)
     {
       if (_app.check() == false)
       {

+ 5 - 5
libtest/unittest.cc

@@ -464,7 +464,7 @@ static test_return_t application_gdb_true_BINARY2(void *)
   test_skip(0, access("/usr/bin/true", X_OK ));
 
   Application true_app("/usr/bin/true");
-  true_app.use_gdb();
+  true_app.use_gdb(true);
 
   test_compare(Application::SUCCESS, true_app.run());
   test_compare(Application::SUCCESS, true_app.join());
@@ -478,7 +478,7 @@ static test_return_t application_gdb_true_BINARY(void *)
   test_skip(0, access("/usr/bin/true", X_OK ));
 
   Application true_app("/usr/bin/true");
-  true_app.use_gdb();
+  true_app.use_gdb(true);
 
   const char *args[]= { "--fubar", 0 };
   test_compare(Application::SUCCESS, true_app.run(args));
@@ -685,7 +685,7 @@ static test_return_t wait_services_appliction_TEST(void *)
   test_skip(0, access("libtest/wait", X_OK ));
 
   libtest::Application wait_app("libtest/wait", true);
-  wait_app.use_gdb();
+  wait_app.use_gdb(true);
 
   const char *args[]= { "/etc/services", 0 };
   test_compare(Application::SUCCESS, wait_app.run(args));
@@ -706,7 +706,7 @@ static test_return_t gdb_wait_services_appliction_TEST(void *)
   test_skip(0, access("libtest/wait", X_OK ));
 
   libtest::Application wait_app("libtest/wait", true);
-  wait_app.use_gdb();
+  wait_app.use_gdb(true);
 
   const char *args[]= { "/etc/services", 0 };
   test_compare(Application::SUCCESS, wait_app.run(args));
@@ -726,7 +726,7 @@ static test_return_t gdb_abort_services_appliction_TEST(void *)
 #endif
 
   libtest::Application abort_app("libtest/abort", true);
-  abort_app.use_gdb();
+  abort_app.use_gdb(true);
 
   test_compare(Application::SUCCESS, abort_app.run());
   test_compare(Application::SUCCESS, abort_app.join());

+ 16 - 1
tests/cycle.cc

@@ -114,6 +114,21 @@ static test_return_t server_startup_multiple_TEST(void *obj)
   return TEST_SUCCESS;
 }
 
+// We can't really test for this just yet, because we don't know if the server
+// we attach to is really the one we expect to attach too.
+static test_return_t server_startup_conflict_TEST(void*)
+{
+#if 0
+  cycle_context_st *context= (cycle_context_st*)object;
+
+  in_port_t bind_port= libtest::get_free_port();
+  test_compare(true, server_startup(context->servers, "gearmand", bind_port, 0, NULL, false));
+  test_compare(false, server_startup(context->servers, "gearmand", bind_port, 0, NULL, false));
+#endif
+
+  return TEST_SUCCESS;
+}
+
 static test_return_t shutdown_and_remove_TEST(void *obj)
 {
   cycle_context_st *context= (cycle_context_st*)obj;
@@ -127,6 +142,7 @@ test_st server_startup_TESTS[] ={
   {"server_startup(many)", false, (test_callback_fn*)server_startup_multiple_TEST },
   {"shutdown_and_remove()", false, (test_callback_fn*)shutdown_and_remove_TEST },
   {"server_startup(many)", false, (test_callback_fn*)server_startup_multiple_TEST },
+  {"server_startup() with bind() conflict", false, (test_callback_fn*)server_startup_conflict_TEST },
   {0, 0, 0}
 };
 
@@ -201,4 +217,3 @@ void get_world(libtest::Framework *world)
   world->create(world_create);
   world->destroy(world_destroy);
 }
-