>From c6ceb0d504f6da8fc5a49b84c9c6445a9497491e Mon Sep 17 00:00:00 2001 From: Robert Edmonds Date: Mon, 6 Jan 2014 21:07:20 -0500 Subject: [PATCH] Add so-reuseport option to enable SO_REUSEPORT on listening sockets on Linux 3.9+ --- daemon/daemon.c | 49 +++++++++++++++++++++++++++---- daemon/daemon.h | 6 ++-- daemon/remote.c | 2 +- doc/unbound.conf.5.in | 6 ++++ services/listen_dnsport.c | 73 ++++++++++++++++++++++++++++++++++------------ services/listen_dnsport.h | 7 +++-- services/outside_network.c | 4 +-- util/config_file.c | 3 ++ util/config_file.h | 2 ++ util/configlexer.lex | 1 + util/configparser.y | 15 ++++++++-- 11 files changed, 135 insertions(+), 33 deletions(-) diff --git a/daemon/daemon.c b/daemon/daemon.c index 2df2de1..c234eee 100644 --- a/daemon/daemon.c +++ b/daemon/daemon.c @@ -255,9 +255,35 @@ daemon_open_shared_ports(struct daemon* daemon) { log_assert(daemon); if(daemon->cfg->port != daemon->listening_port) { - listening_ports_free(daemon->ports); - if(!(daemon->ports=listening_ports_open(daemon->cfg))) + int i; +#if defined(__linux__) && defined(SO_REUSEPORT) + if(daemon->cfg->so_reuseport && daemon->cfg->num_threads > 0) + daemon->num_ports = daemon->cfg->num_threads; + else + daemon->num_ports = 1; +#else + daemon->num_ports = 1; +#endif + if(daemon->ports != NULL) { + for(i=0; inum_ports; i++) + listening_ports_free(daemon->ports[i]); + free(daemon->ports); + daemon->ports = NULL; + } + if(!(daemon->ports = (struct listen_port**)calloc( + daemon->num_ports, sizeof(*daemon->ports)))) { return 0; + } + for(i=0; inum_ports; i++) { + if(!(daemon->ports[i]= + listening_ports_open(daemon->cfg))) { + for(i=0; inum_ports; i++) + listening_ports_free(daemon->ports[i]); + free(daemon->ports); + daemon->ports = NULL; + return 0; + } + } daemon->listening_port = daemon->cfg->port; } if(!daemon->cfg->remote_control_enable && daemon->rc_port) { @@ -394,6 +420,7 @@ static void* thread_start(void* arg) { struct worker* worker = (struct worker*)arg; + int port_num = 0; log_thread_set(&worker->thread_num); ub_thread_blocksigs(); #ifdef THREADS_DISABLED @@ -401,7 +428,14 @@ thread_start(void* arg) tube_close_write(worker->cmd); close_other_pipes(worker->daemon, worker->thread_num); #endif - if(!worker_init(worker, worker->daemon->cfg, worker->daemon->ports, 0)) +#if defined(__linux__) && defined(SO_REUSEPORT) + if(worker->daemon->cfg->so_reuseport) + port_num = worker->thread_num; + else + port_num = 0; +#endif + if(!worker_init(worker, worker->daemon->cfg, + worker->daemon->ports[port_num], 0)) fatal_exit("Could not initialize thread"); worker_work(worker); @@ -474,7 +508,7 @@ daemon_fork(struct daemon* daemon) #if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP) /* in libev the first inited base gets signals */ - if(!worker_init(daemon->workers[0], daemon->cfg, daemon->ports, 1)) + if(!worker_init(daemon->workers[0], daemon->cfg, daemon->ports[0], 1)) fatal_exit("Could not initialize main thread"); #endif @@ -488,7 +522,7 @@ daemon_fork(struct daemon* daemon) */ #if !(defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)) /* libevent has the last inited base get signals (or any base) */ - if(!worker_init(daemon->workers[0], daemon->cfg, daemon->ports, 1)) + if(!worker_init(daemon->workers[0], daemon->cfg, daemon->ports[0], 1)) fatal_exit("Could not initialize main thread"); #endif signal_handling_playback(daemon->workers[0]); @@ -534,11 +568,14 @@ daemon_cleanup(struct daemon* daemon) void daemon_delete(struct daemon* daemon) { + int i; if(!daemon) return; modstack_desetup(&daemon->mods, daemon->env); daemon_remote_delete(daemon->rc); - listening_ports_free(daemon->ports); + for(i = 0; i < daemon->num_ports; i++) + listening_ports_free(daemon->ports[i]); + free(daemon->ports); listening_ports_free(daemon->rc_ports); if(daemon->env) { slabhash_delete(daemon->env->msg_cache); diff --git a/daemon/daemon.h b/daemon/daemon.h index 8e47ea0..34d62b8 100644 --- a/daemon/daemon.h +++ b/daemon/daemon.h @@ -72,8 +72,10 @@ struct daemon { char* pidfile; /** port number that has ports opened. */ int listening_port; - /** listening ports, opened, to be shared by threads */ - struct listen_port* ports; + /** array of listening ports, opened */ + struct listen_port** ports; + /** size of ports array */ + int num_ports; /** port number for remote that has ports opened. */ int rc_port; /** listening ports for remote control */ diff --git a/daemon/remote.c b/daemon/remote.c index 404ff5f..78acac0 100644 --- a/daemon/remote.c +++ b/daemon/remote.c @@ -271,7 +271,7 @@ add_open(const char* ip, int nr, struct listen_port** list, int noproto_is_err) } /* open fd */ - fd = create_tcp_accept_sock(res, 1, &noproto); + fd = create_tcp_accept_sock(res, 1, &noproto, 0); freeaddrinfo(res); if(fd == -1 && noproto) { if(!noproto_is_err) diff --git a/doc/unbound.conf.5.in b/doc/unbound.conf.5.in index ac06056..d4ae222 100644 --- a/doc/unbound.conf.5.in +++ b/doc/unbound.conf.5.in @@ -250,6 +250,12 @@ linux unbound needs root permission to bypass the limit, or the admin can use sysctl net.core.wmem_max. On BSD, Solaris changes are similar to so\-rcvbuf. .TP +.B so\-reuseport: \fI +If yes, then open dedicated listening sockets for incoming queries for each +thread and try to set the SO_REUSEPORT socket option on each socket. May +distribute incoming queries to threads more evenly. Default is no. Only +supported on Linux >= 3.9. +.TP .B rrset\-cache\-size: \fI Number of bytes size of the RRset cache. Default is 4 megabytes. A plain number is in bytes, append 'k', 'm' or 'g' for kilobytes, megabytes diff --git a/services/listen_dnsport.c b/services/listen_dnsport.c index f37c3fc..cd9bce8 100644 --- a/services/listen_dnsport.c +++ b/services/listen_dnsport.c @@ -92,10 +92,10 @@ verbose_print_addr(struct addrinfo *addr) int create_udp_sock(int family, int socktype, struct sockaddr* addr, socklen_t addrlen, int v6only, int* inuse, int* noproto, - int rcv, int snd, int listen) + int rcv, int snd, int listen, int reuseport) { int s; -#if defined(SO_REUSEADDR) || defined(IPV6_USE_MIN_MTU) +#if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_USE_MIN_MTU) int on=1; #endif #ifdef IPV6_MTU @@ -148,6 +148,23 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr, return -1; } #endif /* SO_REUSEADDR */ +#if defined(__linux__) && defined(SO_REUSEPORT) + /* Linux specific: try to set SO_REUSEPORT so that incoming + * queries are distributed evenly among the receiving threads. + * Each thread must have its own socket bound to the same port, + * with SO_REUSEPORT set on each socket. + */ + if (reuseport && + setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on, + (socklen_t)sizeof(on)) < 0) { + log_err("setsockopt(.. SO_REUSEPORT ..) failed: %s", + strerror(errno)); + close(s); + *noproto = 0; + *inuse = 0; + return -1; + } +#endif /* defined(__linux__) && defined(SO_REUSEPORT) */ } if(rcv) { #ifdef SO_RCVBUF @@ -411,10 +428,11 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr, } int -create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto) +create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto, + int reuseport) { int s; -#if defined(SO_REUSEADDR) || defined(IPV6_V6ONLY) +#if defined(SO_REUSEADDR) || defined(SO_REUSEPORT) || defined(IPV6_V6ONLY) int on = 1; #endif /* SO_REUSEADDR || IPV6_V6ONLY */ verbose_print_addr(addr); @@ -452,6 +470,20 @@ create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto) return -1; } #endif /* SO_REUSEADDR */ +#if defined(__linux__) && defined(SO_REUSEPORT) + /* Linux specific: try to set SO_REUSEPORT so that incoming + * connections are distributed evenly among the receiving threads. + * Each thread must have its own socket bound to the same port, + * with SO_REUSEPORT set on each socket. + */ + if (reuseport && setsockopt(s, SOL_SOCKET, SO_REUSEPORT, (void*)&on, + (socklen_t)sizeof(on)) < 0) { + log_err("setsockopt(.. SO_REUSEPORT ..) failed: %s", + strerror(errno)); + close(s); + return -1; + } +#endif /* defined(__linux__) && defined(SO_REUSEPORT) */ #if defined(IPV6_V6ONLY) if(addr->ai_family == AF_INET6 && v6only) { if(setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, @@ -519,7 +551,8 @@ create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto) */ static int make_sock(int stype, const char* ifname, const char* port, - struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd) + struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, + int reuseport) { struct addrinfo *res = NULL; int r, s, inuse, noproto; @@ -546,14 +579,15 @@ make_sock(int stype, const char* ifname, const char* port, verbose_print_addr(res); s = create_udp_sock(res->ai_family, res->ai_socktype, (struct sockaddr*)res->ai_addr, res->ai_addrlen, - v6only, &inuse, &noproto, (int)rcv, (int)snd, 1); + v6only, &inuse, &noproto, (int)rcv, (int)snd, 1, + reuseport); if(s == -1 && inuse) { log_err("bind: address already in use"); } else if(s == -1 && noproto && hints->ai_family == AF_INET6){ *noip6 = 1; } } else { - s = create_tcp_accept_sock(res, v6only, &noproto); + s = create_tcp_accept_sock(res, v6only, &noproto, reuseport); if(s == -1 && noproto && hints->ai_family == AF_INET6){ *noip6 = 1; } @@ -565,7 +599,8 @@ make_sock(int stype, const char* ifname, const char* port, /** make socket and first see if ifname contains port override info */ static int make_sock_port(int stype, const char* ifname, const char* port, - struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd) + struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, + int reuseport) { char* s = strchr(ifname, '@'); if(s) { @@ -587,9 +622,10 @@ make_sock_port(int stype, const char* ifname, const char* port, strlcpy(p, s+1, sizeof(p)); p[strlen(s+1)]=0; return make_sock(stype, newif, p, hints, v6only, noip6, - rcv, snd); + rcv, snd, reuseport); } - return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd); + return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd, + reuseport); } /** @@ -681,19 +717,20 @@ set_recvpktinfo(int s, int family) * @param rcv: receive buffer size for UDP * @param snd: send buffer size for UDP * @param ssl_port: ssl service port number + * @param reuseport: try to set SO_REUSEPORT. * @return: returns false on error. */ static int ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, struct addrinfo *hints, const char* port, struct listen_port** list, - size_t rcv, size_t snd, int ssl_port) + size_t rcv, size_t snd, int ssl_port, int reuseport) { int s, noip6=0; if(!do_udp && !do_tcp) return 0; if(do_auto) { if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, - &noip6, rcv, snd)) == -1) { + &noip6, rcv, snd, reuseport)) == -1) { if(noip6) { log_warn("IPv6 protocol not available"); return 1; @@ -720,7 +757,7 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, } else if(do_udp) { /* regular udp socket */ if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, - &noip6, rcv, snd)) == -1) { + &noip6, rcv, snd, reuseport)) == -1) { if(noip6) { log_warn("IPv6 protocol not available"); return 1; @@ -741,7 +778,7 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, atoi(strchr(ifname, '@')+1) == ssl_port) || (!strchr(ifname, '@') && atoi(port) == ssl_port)); if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1, - &noip6, 0, 0)) == -1) { + &noip6, 0, 0, reuseport)) == -1) { if(noip6) { /*log_warn("IPv6 protocol not available");*/ return 1; @@ -896,7 +933,7 @@ listening_ports_open(struct config_file* cfg) do_auto, cfg->do_udp, do_tcp, &hints, portbuf, &list, cfg->so_rcvbuf, cfg->so_sndbuf, - cfg->ssl_port)) { + cfg->ssl_port, cfg->so_reuseport)) { listening_ports_free(list); return NULL; } @@ -907,7 +944,7 @@ listening_ports_open(struct config_file* cfg) do_auto, cfg->do_udp, do_tcp, &hints, portbuf, &list, cfg->so_rcvbuf, cfg->so_sndbuf, - cfg->ssl_port)) { + cfg->ssl_port, cfg->so_reuseport)) { listening_ports_free(list); return NULL; } @@ -920,7 +957,7 @@ listening_ports_open(struct config_file* cfg) if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp, do_tcp, &hints, portbuf, &list, cfg->so_rcvbuf, cfg->so_sndbuf, - cfg->ssl_port)) { + cfg->ssl_port, cfg->so_reuseport)) { listening_ports_free(list); return NULL; } @@ -931,7 +968,7 @@ listening_ports_open(struct config_file* cfg) if(!ports_create_if(cfg->ifs[i], 0, cfg->do_udp, do_tcp, &hints, portbuf, &list, cfg->so_rcvbuf, cfg->so_sndbuf, - cfg->ssl_port)) { + cfg->ssl_port, cfg->so_reuseport)) { listening_ports_free(list); return NULL; } diff --git a/services/listen_dnsport.h b/services/listen_dnsport.h index 28274c9..fee3554 100644 --- a/services/listen_dnsport.h +++ b/services/listen_dnsport.h @@ -181,19 +181,22 @@ void listen_start_accept(struct listen_dnsport* listen); * @param snd: set size on sndbuf with socket option, if 0 it is not set. * @param listen: if true, this is a listening UDP port, eg port 53, and * set SO_REUSEADDR on it. + * @param reuseport: if true, try to set SO_REUSEPORT on listening UDP port. * @return: the socket. -1 on error. */ int create_udp_sock(int family, int socktype, struct sockaddr* addr, socklen_t addrlen, int v6only, int* inuse, int* noproto, int rcv, - int snd, int listen); + int snd, int listen, int reuseport); /** * Create and bind TCP listening socket * @param addr: address info ready to make socket. * @param v6only: enable ip6 only flag on ip6 sockets. * @param noproto: if error caused by lack of protocol support. + * @param reuseport: if true, try to set SO_REUSEPORT. * @return: the socket. -1 on error. */ -int create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto); +int create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto, + int reuseport); #endif /* LISTEN_DNSPORT_H */ diff --git a/services/outside_network.c b/services/outside_network.c index ebb3515..21a6eff 100644 --- a/services/outside_network.c +++ b/services/outside_network.c @@ -849,13 +849,13 @@ udp_sockport(struct sockaddr_storage* addr, socklen_t addrlen, int port, sa->sin6_port = (in_port_t)htons((uint16_t)port); fd = create_udp_sock(AF_INET6, SOCK_DGRAM, (struct sockaddr*)addr, addrlen, 1, inuse, &noproto, - 0, 0, 0); + 0, 0, 0, 0); } else { struct sockaddr_in* sa = (struct sockaddr_in*)addr; sa->sin_port = (in_port_t)htons((uint16_t)port); fd = create_udp_sock(AF_INET, SOCK_DGRAM, (struct sockaddr*)addr, addrlen, 1, inuse, &noproto, - 0, 0, 0); + 0, 0, 0, 0); } return fd; } diff --git a/util/config_file.c b/util/config_file.c index aa80f92..931605f 100644 --- a/util/config_file.c +++ b/util/config_file.c @@ -156,6 +156,7 @@ config_create(void) cfg->if_automatic = 0; cfg->so_rcvbuf = 0; cfg->so_sndbuf = 0; + cfg->so_reuseport = 0; cfg->num_ifs = 0; cfg->ifs = NULL; cfg->num_out_ifs = 0; @@ -365,6 +366,7 @@ int config_set_option(struct config_file* cfg, const char* opt, else S_SIZET_OR_ZERO("jostle-timeout:", jostle_time) else S_MEMSIZE("so-rcvbuf:", so_rcvbuf) else S_MEMSIZE("so-sndbuf:", so_sndbuf) + else S_YNO("so-reuseport:", so_reuseport) else S_MEMSIZE("rrset-cache-size:", rrset_cache_size) else S_POW2("rrset-cache-slabs:", rrset_cache_slabs) else S_YNO("prefetch:", prefetch) @@ -610,6 +612,7 @@ config_get_option(struct config_file* cfg, const char* opt, else O_UNS(opt, "jostle-timeout", jostle_time) else O_MEM(opt, "so-rcvbuf", so_rcvbuf) else O_MEM(opt, "so-sndbuf", so_sndbuf) + else O_YNO(opt, "so-reuseport", so_reuseport) else O_MEM(opt, "rrset-cache-size", rrset_cache_size) else O_DEC(opt, "rrset-cache-slabs", rrset_cache_slabs) else O_YNO(opt, "prefetch-key", prefetch_key) diff --git a/util/config_file.h b/util/config_file.h index 6161302..afc2ec9 100644 --- a/util/config_file.h +++ b/util/config_file.h @@ -130,6 +130,8 @@ struct config_file { size_t so_rcvbuf; /** SO_SNDBUF size to set on port 53 UDP socket */ size_t so_sndbuf; + /** SO_REUSEPORT requested on port 53 sockets */ + int so_reuseport; /** number of interfaces to open. If 0 default all interfaces. */ int num_ifs; diff --git a/util/configlexer.lex b/util/configlexer.lex index 0b3738f..5e625c8 100644 --- a/util/configlexer.lex +++ b/util/configlexer.lex @@ -226,6 +226,7 @@ outgoing-interface{COLON} { YDVAR(1, VAR_OUTGOING_INTERFACE) } interface-automatic{COLON} { YDVAR(1, VAR_INTERFACE_AUTOMATIC) } so-rcvbuf{COLON} { YDVAR(1, VAR_SO_RCVBUF) } so-sndbuf{COLON} { YDVAR(1, VAR_SO_SNDBUF) } +so-reuseport{COLON} { YDVAR(1, VAR_SO_REUSEPORT) } chroot{COLON} { YDVAR(1, VAR_CHROOT) } username{COLON} { YDVAR(1, VAR_USERNAME) } directory{COLON} { YDVAR(1, VAR_DIRECTORY) } diff --git a/util/configparser.y b/util/configparser.y index b11d7d4..5b69d66 100644 --- a/util/configparser.y +++ b/util/configparser.y @@ -101,7 +101,7 @@ extern struct config_parser_state* cfg_parser; %token VAR_VAL_SIG_SKEW_MAX VAR_CACHE_MIN_TTL VAR_VAL_LOG_LEVEL %token VAR_AUTO_TRUST_ANCHOR_FILE VAR_KEEP_MISSING VAR_ADD_HOLDDOWN %token VAR_DEL_HOLDDOWN VAR_SO_RCVBUF VAR_EDNS_BUFFER_SIZE VAR_PREFETCH -%token VAR_PREFETCH_KEY VAR_SO_SNDBUF VAR_HARDEN_BELOW_NXDOMAIN +%token VAR_PREFETCH_KEY VAR_SO_SNDBUF VAR_SO_REUSEPORT VAR_HARDEN_BELOW_NXDOMAIN %token VAR_IGNORE_CD_FLAG VAR_LOG_QUERIES VAR_TCP_UPSTREAM VAR_SSL_UPSTREAM %token VAR_SSL_SERVICE_KEY VAR_SSL_SERVICE_PEM VAR_SSL_PORT VAR_FORWARD_FIRST %token VAR_STUB_FIRST VAR_MINIMAL_RESPONSES VAR_RRSET_ROUNDROBIN @@ -162,7 +162,8 @@ content_server: server_num_threads | server_verbosity | server_port | server_so_sndbuf | server_harden_below_nxdomain | server_ignore_cd_flag | server_log_queries | server_tcp_upstream | server_ssl_upstream | server_ssl_service_key | server_ssl_service_pem | server_ssl_port | - server_minimal_responses | server_rrset_roundrobin | server_max_udp_size + server_minimal_responses | server_rrset_roundrobin | server_max_udp_size | + server_so_reuseport ; stubstart: VAR_STUB_ZONE { @@ -595,6 +596,16 @@ server_so_sndbuf: VAR_SO_SNDBUF STRING_ARG free($2); } ; +server_so_reuseport: VAR_SO_REUSEPORT STRING_ARG + { + OUTYY(("P(server_so_reuseport:%s)\n", $2)); + if(strcmp($2, "yes") != 0 && strcmp($2, "no") != 0) + yyerror("expected yes or no."); + else cfg_parser->cfg->so_reuseport = + (strcmp($2, "yes")==0); + free($2); + } + ; server_edns_buffer_size: VAR_EDNS_BUFFER_SIZE STRING_ARG { OUTYY(("P(server_edns_buffer_size:%s)\n", $2)); -- 1.8.5.2