Skip to content

Commit cf45f2c

Browse files
Use SO_LINGER with timeout 0 to avoid thousands of server sockets in TIME_WAIT when using small reconnect intervals (#260)
* Use SO_LINGER with timeout 0 to avoid thousands of server sockets in TIME_WAIT when using small reconnect intervals * Added test for small reconnect interval (1 command) * CI adjustments for tcp connection limit and reuse * Skipping reconnect interval test on cluster given it's not supported
1 parent c7d2b57 commit cf45f2c

File tree

3 files changed

+49
-6
lines changed

3 files changed

+49
-6
lines changed

.github/workflows/ci.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ jobs:
4646
sudo apt-get install redis
4747
sudo service redis-server stop
4848
49+
- name: Increase connection limit
50+
run: |
51+
sudo sysctl -w net.ipv4.tcp_fin_timeout=10
52+
sudo sysctl -w net.ipv4.tcp_tw_reuse=1
53+
ulimit -n 40960
54+
4955
- name: Generate TLS test certificates
5056
if: matrix.platform == 'ubuntu-latest'
5157
run: |

shard_connection.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -231,12 +231,18 @@ int shard_connection::setup_socket(struct connect_info* addr) {
231231
return -1;
232232
}
233233

234-
// configure socket behavior
235-
struct linger ling = {0, 0};
236-
int flags = 1;
234+
237235
int error = setsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE, (void *) &flags, sizeof(flags));
238236
assert(error == 0);
239237

238+
/*
239+
* Configure socket behavior:
240+
* If l_onoff is non-zero and l_linger is zero:
241+
* The socket will discard any unsent data and the close() call will return immediately.
242+
*/
243+
struct linger ling;
244+
ling.l_onoff = 1; // Enable SO_LINGER
245+
ling.l_linger = 0; // Discard any unsent data and close immediately
240246
error = setsockopt(sockfd, SOL_SOCKET, SO_LINGER, (void *) &ling, sizeof(ling));
241247
assert(error == 0);
242248

@@ -264,7 +270,7 @@ int shard_connection::connect(struct connect_info* addr) {
264270
// setup socket
265271
int sockfd = setup_socket(addr);
266272
if (sockfd < 0) {
267-
fprintf(stderr, "Failed to setup socket: %s", strerror(errno));
273+
fprintf(stderr, "Failed to setup socket: %s\n", strerror(errno));
268274
return -1;
269275
}
270276

@@ -478,8 +484,10 @@ void shard_connection::process_response(void)
478484
// client manage connection & disconnection of shard
479485
m_conns_manager->disconnect();
480486
ret = m_conns_manager->connect();
481-
assert(ret == 0);
482-
487+
if (ret != 0) {
488+
benchmark_error_log("failed to reconnect.\n");
489+
exit(1);
490+
}
483491
return;
484492
}
485493
}

tests/tests_oss_simple_flow.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,35 @@ def test_default_set_get_1_1(env):
350350
# assert same number of gets and sets
351351
env.assertEqual(merged_command_stats['cmdstat_set']['calls'], merged_command_stats['cmdstat_get']['calls'])
352352

353+
# run each test on different env
354+
def test_short_reconnect_interval(env):
355+
# cluster mode dose not support reconnect-interval option
356+
env.skipOnCluster()
357+
benchmark_specs = {"name": env.testName, "args": ['--reconnect-interval=1']}
358+
addTLSArgs(benchmark_specs, env)
359+
config = get_default_memtier_config()
360+
master_nodes_list = env.getMasterNodesList()
361+
overall_expected_request_count = get_expected_request_count(config)
362+
363+
add_required_env_arguments(benchmark_specs, config, env, master_nodes_list)
364+
365+
# Create a temporary directory
366+
test_dir = tempfile.mkdtemp()
367+
368+
config = RunConfig(test_dir, env.testName, config, {})
369+
ensure_clean_benchmark_folder(config.results_dir)
370+
371+
benchmark = Benchmark.from_json(config, benchmark_specs)
372+
373+
# benchmark.run() returns True if the return code of memtier_benchmark was 0
374+
memtier_ok = benchmark.run()
375+
376+
master_nodes_connections = env.getOSSMasterNodesConnectionList()
377+
merged_command_stats = {'cmdstat_set': {'calls': 0}, 'cmdstat_get': {'calls': 0}}
378+
overall_request_count = agg_info_commandstats(master_nodes_connections, merged_command_stats)
379+
assert_minimum_memtier_outcomes(config, env, memtier_ok, overall_expected_request_count, overall_request_count)
380+
381+
353382
# run each test on different env
354383
def test_default_set_get_3_runs(env):
355384
run_count = 3

0 commit comments

Comments
 (0)