Skip to content

Commit b1f2ff8

Browse files
Make multiple settings dynamic for tuning on larger clusters (#16347) (#16442)
(cherry picked from commit ca40ba4) Signed-off-by: Rahul Karajgikar <[email protected]> Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 41258e7 commit b1f2ff8

File tree

9 files changed

+212
-15
lines changed

9 files changed

+212
-15
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6868
- Optimise clone operation for incremental full cluster snapshots ([#16296](https://github.com/opensearch-project/OpenSearch/pull/16296))
6969
- Code cleanup: Remove ApproximateIndexOrDocValuesQuery ([#16273](https://github.com/opensearch-project/OpenSearch/pull/16273))
7070
- Update last seen cluster state in the commit phase ([#16215](https://github.com/opensearch-project/OpenSearch/pull/16215))
71+
- Make multiple settings dynamic for tuning on larger clusters([#16347](https://github.com/opensearch-project/OpenSearch/pull/16347))
7172

7273
### Deprecated
7374

server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,8 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery
142142
"cluster.publish.timeout",
143143
TimeValue.timeValueMillis(30000),
144144
TimeValue.timeValueMillis(1),
145-
Setting.Property.NodeScope
145+
Setting.Property.NodeScope,
146+
Setting.Property.Dynamic
146147
);
147148

148149
private final Settings settings;
@@ -165,7 +166,7 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery
165166
private final Random random;
166167
private final ElectionSchedulerFactory electionSchedulerFactory;
167168
private final SeedHostsResolver configuredHostsResolver;
168-
private final TimeValue publishTimeout;
169+
private TimeValue publishTimeout;
169170
private final TimeValue publishInfoTimeout;
170171
private final PublicationTransportHandler publicationHandler;
171172
private final LeaderChecker leaderChecker;
@@ -248,6 +249,7 @@ public Coordinator(
248249
this.lastJoin = Optional.empty();
249250
this.joinAccumulator = new InitialJoinAccumulator();
250251
this.publishTimeout = PUBLISH_TIMEOUT_SETTING.get(settings);
252+
clusterSettings.addSettingsUpdateConsumer(PUBLISH_TIMEOUT_SETTING, this::setPublishTimeout);
251253
this.publishInfoTimeout = PUBLISH_INFO_TIMEOUT_SETTING.get(settings);
252254
this.random = random;
253255
this.electionSchedulerFactory = new ElectionSchedulerFactory(settings, random, transportService.getThreadPool());
@@ -302,6 +304,7 @@ public Coordinator(
302304
);
303305
this.lagDetector = new LagDetector(
304306
settings,
307+
clusterSettings,
305308
transportService.getThreadPool(),
306309
n -> removeNode(n, "lagging"),
307310
transportService::getLocalNode
@@ -320,6 +323,10 @@ public Coordinator(
320323
this.clusterSettings = clusterSettings;
321324
}
322325

326+
private void setPublishTimeout(TimeValue publishTimeout) {
327+
this.publishTimeout = publishTimeout;
328+
}
329+
323330
private ClusterFormationState getClusterFormationState() {
324331
return new ClusterFormationState(
325332
settings,
@@ -1670,7 +1677,6 @@ public void onNodeAck(DiscoveryNode node, Exception e) {
16701677
this.localNodeAckEvent = localNodeAckEvent;
16711678
this.ackListener = ackListener;
16721679
this.publishListener = publishListener;
1673-
16741680
this.timeoutHandler = singleNodeDiscovery ? null : transportService.getThreadPool().schedule(new Runnable() {
16751681
@Override
16761682
public void run() {

server/src/main/java/org/opensearch/cluster/coordination/ElectionSchedulerFactory.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ protected void doRun() {
214214
if (isClosed.get()) {
215215
logger.debug("{} not starting election", this);
216216
} else {
217-
logger.debug("{} starting election", this);
217+
logger.debug("{} starting election with duration {}", this, duration);
218218
scheduleNextElection(duration, scheduledRunnable);
219219
scheduledRunnable.run();
220220
}

server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,15 +92,16 @@ public class FollowersChecker {
9292
"cluster.fault_detection.follower_check.interval",
9393
TimeValue.timeValueMillis(1000),
9494
TimeValue.timeValueMillis(100),
95-
Setting.Property.NodeScope
95+
Setting.Property.NodeScope,
96+
Setting.Property.Dynamic
9697
);
9798

9899
// the timeout for each check sent to each node
99100
public static final Setting<TimeValue> FOLLOWER_CHECK_TIMEOUT_SETTING = Setting.timeSetting(
100101
"cluster.fault_detection.follower_check.timeout",
101102
TimeValue.timeValueMillis(10000),
102103
TimeValue.timeValueMillis(1),
103-
TimeValue.timeValueMillis(60000),
104+
TimeValue.timeValueMillis(150000),
104105
Setting.Property.NodeScope,
105106
Setting.Property.Dynamic
106107
);
@@ -115,7 +116,7 @@ public class FollowersChecker {
115116

116117
private final Settings settings;
117118

118-
private final TimeValue followerCheckInterval;
119+
private TimeValue followerCheckInterval;
119120
private TimeValue followerCheckTimeout;
120121
private final int followerCheckRetryCount;
121122
private final BiConsumer<DiscoveryNode, String> onNodeFailure;
@@ -148,6 +149,7 @@ public FollowersChecker(
148149
followerCheckInterval = FOLLOWER_CHECK_INTERVAL_SETTING.get(settings);
149150
followerCheckTimeout = FOLLOWER_CHECK_TIMEOUT_SETTING.get(settings);
150151
followerCheckRetryCount = FOLLOWER_CHECK_RETRY_COUNT_SETTING.get(settings);
152+
clusterSettings.addSettingsUpdateConsumer(FOLLOWER_CHECK_INTERVAL_SETTING, this::setFollowerCheckInterval);
151153
clusterSettings.addSettingsUpdateConsumer(FOLLOWER_CHECK_TIMEOUT_SETTING, this::setFollowerCheckTimeout);
152154
updateFastResponseState(0, Mode.CANDIDATE);
153155
transportService.registerRequestHandler(
@@ -167,6 +169,10 @@ public void onNodeDisconnected(DiscoveryNode node, Transport.Connection connecti
167169
this.clusterManagerMetrics = clusterManagerMetrics;
168170
}
169171

172+
private void setFollowerCheckInterval(TimeValue followerCheckInterval) {
173+
this.followerCheckInterval = followerCheckInterval;
174+
}
175+
170176
private void setFollowerCheckTimeout(TimeValue followerCheckTimeout) {
171177
this.followerCheckTimeout = followerCheckTimeout;
172178
}

server/src/main/java/org/opensearch/cluster/coordination/LagDetector.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.apache.logging.log4j.LogManager;
3535
import org.apache.logging.log4j.Logger;
3636
import org.opensearch.cluster.node.DiscoveryNode;
37+
import org.opensearch.common.settings.ClusterSettings;
3738
import org.opensearch.common.settings.Setting;
3839
import org.opensearch.common.settings.Settings;
3940
import org.opensearch.common.unit.TimeValue;
@@ -68,23 +69,26 @@ public class LagDetector {
6869
"cluster.follower_lag.timeout",
6970
TimeValue.timeValueMillis(90000),
7071
TimeValue.timeValueMillis(1),
71-
Setting.Property.NodeScope
72+
Setting.Property.NodeScope,
73+
Setting.Property.Dynamic
7274
);
7375

74-
private final TimeValue clusterStateApplicationTimeout;
76+
private TimeValue clusterStateApplicationTimeout;
7577
private final Consumer<DiscoveryNode> onLagDetected;
7678
private final Supplier<DiscoveryNode> localNodeSupplier;
7779
private final ThreadPool threadPool;
7880
private final Map<DiscoveryNode, NodeAppliedStateTracker> appliedStateTrackersByNode = newConcurrentMap();
7981

8082
public LagDetector(
8183
final Settings settings,
84+
final ClusterSettings clusterSettings,
8285
final ThreadPool threadPool,
8386
final Consumer<DiscoveryNode> onLagDetected,
8487
final Supplier<DiscoveryNode> localNodeSupplier
8588
) {
8689
this.threadPool = threadPool;
8790
this.clusterStateApplicationTimeout = CLUSTER_FOLLOWER_LAG_TIMEOUT_SETTING.get(settings);
91+
clusterSettings.addSettingsUpdateConsumer(CLUSTER_FOLLOWER_LAG_TIMEOUT_SETTING, this::setFollowerLagTimeout);
8892
this.onLagDetected = onLagDetected;
8993
this.localNodeSupplier = localNodeSupplier;
9094
}
@@ -136,6 +140,10 @@ public String toString() {
136140
}
137141
}
138142

143+
private void setFollowerLagTimeout(TimeValue followerCheckLagTimeout) {
144+
this.clusterStateApplicationTimeout = followerCheckLagTimeout;
145+
}
146+
139147
@Override
140148
public String toString() {
141149
return "LagDetector{"

server/src/main/java/org/opensearch/gateway/ShardsBatchGatewayAllocator.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ public class ShardsBatchGatewayAllocator implements ExistingShardsAllocator {
7272

7373
public static final String ALLOCATOR_NAME = "shards_batch_gateway_allocator";
7474
private static final Logger logger = LogManager.getLogger(ShardsBatchGatewayAllocator.class);
75-
private final long maxBatchSize;
75+
private long maxBatchSize;
7676
private static final short DEFAULT_SHARD_BATCH_SIZE = 2000;
7777

7878
public static final String PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING_KEY =
@@ -93,7 +93,8 @@ public class ShardsBatchGatewayAllocator implements ExistingShardsAllocator {
9393
DEFAULT_SHARD_BATCH_SIZE,
9494
1,
9595
10000,
96-
Setting.Property.NodeScope
96+
Setting.Property.NodeScope,
97+
Setting.Property.Dynamic
9798
);
9899

99100
/**
@@ -172,6 +173,7 @@ public ShardsBatchGatewayAllocator(
172173
this.batchStartedAction = batchStartedAction;
173174
this.batchStoreAction = batchStoreAction;
174175
this.maxBatchSize = GATEWAY_ALLOCATOR_BATCH_SIZE.get(settings);
176+
clusterSettings.addSettingsUpdateConsumer(GATEWAY_ALLOCATOR_BATCH_SIZE, this::setMaxBatchSize);
175177
this.primaryShardsBatchGatewayAllocatorTimeout = PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING.get(settings);
176178
clusterSettings.addSettingsUpdateConsumer(PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING, this::setPrimaryBatchAllocatorTimeout);
177179
this.replicaShardsBatchGatewayAllocatorTimeout = REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING.get(settings);
@@ -402,6 +404,7 @@ else if (shardRouting.primary() == primary) {
402404
Iterator<ShardRouting> iterator = newShardsToBatch.values().iterator();
403405
assert maxBatchSize > 0 : "Shards batch size must be greater than 0";
404406

407+
logger.debug("Using async fetch batch size {}", maxBatchSize);
405408
long batchSize = maxBatchSize;
406409
Map<ShardId, ShardEntry> perBatchShards = new HashMap<>();
407410
while (iterator.hasNext()) {
@@ -906,6 +909,10 @@ public int getNumberOfStoreShardBatches() {
906909
return batchIdToStoreShardBatch.size();
907910
}
908911

912+
private void setMaxBatchSize(long maxBatchSize) {
913+
this.maxBatchSize = maxBatchSize;
914+
}
915+
909916
protected void setPrimaryBatchAllocatorTimeout(TimeValue primaryShardsBatchGatewayAllocatorTimeout) {
910917
this.primaryShardsBatchGatewayAllocatorTimeout = primaryShardsBatchGatewayAllocatorTimeout;
911918
}

server/src/test/java/org/opensearch/cluster/coordination/CoordinationCheckerSettingsTests.java

Lines changed: 103 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,10 @@
1414
import org.opensearch.common.unit.TimeValue;
1515
import org.opensearch.test.OpenSearchSingleNodeTestCase;
1616

17+
import static org.opensearch.cluster.coordination.Coordinator.PUBLISH_TIMEOUT_SETTING;
18+
import static org.opensearch.cluster.coordination.FollowersChecker.FOLLOWER_CHECK_INTERVAL_SETTING;
1719
import static org.opensearch.cluster.coordination.FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING;
20+
import static org.opensearch.cluster.coordination.LagDetector.CLUSTER_FOLLOWER_LAG_TIMEOUT_SETTING;
1821
import static org.opensearch.cluster.coordination.LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING;
1922
import static org.opensearch.common.unit.TimeValue.timeValueSeconds;
2023
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;
@@ -42,10 +45,10 @@ public void testFollowerCheckTimeoutValueUpdate() {
4245

4346
public void testFollowerCheckTimeoutMaxValue() {
4447
Setting<TimeValue> setting1 = FOLLOWER_CHECK_TIMEOUT_SETTING;
45-
Settings timeSettings1 = Settings.builder().put(setting1.getKey(), "61s").build();
48+
Settings timeSettings1 = Settings.builder().put(setting1.getKey(), "151s").build();
4649

4750
assertThrows(
48-
"failed to parse value [61s] for setting [" + setting1.getKey() + "], must be <= [60000ms]",
51+
"failed to parse value [151s] for setting [" + setting1.getKey() + "], must be <= [150000ms]",
4952
IllegalArgumentException.class,
5053
() -> {
5154
client().admin().cluster().prepareUpdateSettings().setPersistentSettings(timeSettings1).execute().actionGet();
@@ -66,6 +69,38 @@ public void testFollowerCheckTimeoutMinValue() {
6669
);
6770
}
6871

72+
public void testFollowerCheckIntervalValueUpdate() {
73+
Setting<TimeValue> setting1 = FOLLOWER_CHECK_INTERVAL_SETTING;
74+
Settings timeSettings1 = Settings.builder().put(setting1.getKey(), "10s").build();
75+
try {
76+
ClusterUpdateSettingsResponse response = client().admin()
77+
.cluster()
78+
.prepareUpdateSettings()
79+
.setPersistentSettings(timeSettings1)
80+
.execute()
81+
.actionGet();
82+
assertAcked(response);
83+
assertEquals(timeValueSeconds(10), setting1.get(response.getPersistentSettings()));
84+
} finally {
85+
// cleanup
86+
timeSettings1 = Settings.builder().putNull(setting1.getKey()).build();
87+
client().admin().cluster().prepareUpdateSettings().setPersistentSettings(timeSettings1).execute().actionGet();
88+
}
89+
}
90+
91+
public void testFollowerCheckIntervalMinValue() {
92+
Setting<TimeValue> setting1 = FOLLOWER_CHECK_INTERVAL_SETTING;
93+
Settings timeSettings1 = Settings.builder().put(setting1.getKey(), "10ms").build();
94+
95+
assertThrows(
96+
"failed to parse value [10ms] for setting [" + setting1.getKey() + "], must be >= [100ms]",
97+
IllegalArgumentException.class,
98+
() -> {
99+
client().admin().cluster().prepareUpdateSettings().setPersistentSettings(timeSettings1).execute().actionGet();
100+
}
101+
);
102+
}
103+
69104
public void testLeaderCheckTimeoutValueUpdate() {
70105
Setting<TimeValue> setting1 = LEADER_CHECK_TIMEOUT_SETTING;
71106
Settings timeSettings1 = Settings.builder().put(setting1.getKey(), "60s").build();
@@ -110,4 +145,70 @@ public void testLeaderCheckTimeoutMinValue() {
110145
}
111146
);
112147
}
148+
149+
public void testClusterPublishTimeoutValueUpdate() {
150+
Setting<TimeValue> setting1 = PUBLISH_TIMEOUT_SETTING;
151+
Settings timeSettings1 = Settings.builder().put(setting1.getKey(), "60s").build();
152+
try {
153+
ClusterUpdateSettingsResponse response = client().admin()
154+
.cluster()
155+
.prepareUpdateSettings()
156+
.setPersistentSettings(timeSettings1)
157+
.execute()
158+
.actionGet();
159+
assertAcked(response);
160+
assertEquals(timeValueSeconds(60), setting1.get(response.getPersistentSettings()));
161+
} finally {
162+
// cleanup
163+
timeSettings1 = Settings.builder().putNull(setting1.getKey()).build();
164+
client().admin().cluster().prepareUpdateSettings().setPersistentSettings(timeSettings1).execute().actionGet();
165+
}
166+
}
167+
168+
public void testClusterPublishTimeoutMinValue() {
169+
Setting<TimeValue> setting1 = PUBLISH_TIMEOUT_SETTING;
170+
Settings timeSettings1 = Settings.builder().put(setting1.getKey(), "0s").build();
171+
172+
assertThrows(
173+
"failed to parse value [0s] for setting [" + setting1.getKey() + "], must be >= [1ms]",
174+
IllegalArgumentException.class,
175+
() -> {
176+
client().admin().cluster().prepareUpdateSettings().setPersistentSettings(timeSettings1).execute().actionGet();
177+
}
178+
);
179+
}
180+
181+
public void testLagDetectorTimeoutUpdate() {
182+
Setting<TimeValue> setting1 = CLUSTER_FOLLOWER_LAG_TIMEOUT_SETTING;
183+
Settings lagDetectorTimeout = Settings.builder().put(setting1.getKey(), "30s").build();
184+
try {
185+
ClusterUpdateSettingsResponse response = client().admin()
186+
.cluster()
187+
.prepareUpdateSettings()
188+
.setPersistentSettings(lagDetectorTimeout)
189+
.execute()
190+
.actionGet();
191+
192+
assertAcked(response);
193+
assertEquals(timeValueSeconds(30), setting1.get(response.getPersistentSettings()));
194+
} finally {
195+
// cleanup
196+
lagDetectorTimeout = Settings.builder().putNull(setting1.getKey()).build();
197+
client().admin().cluster().prepareUpdateSettings().setPersistentSettings(lagDetectorTimeout).execute().actionGet();
198+
}
199+
}
200+
201+
public void testLagDetectorTimeoutMinValue() {
202+
Setting<TimeValue> setting1 = CLUSTER_FOLLOWER_LAG_TIMEOUT_SETTING;
203+
Settings lagDetectorTimeout = Settings.builder().put(setting1.getKey(), "0s").build();
204+
205+
assertThrows(
206+
"failed to parse value [0s] for setting [" + setting1.getKey() + "], must be >= [1ms]",
207+
IllegalArgumentException.class,
208+
() -> {
209+
client().admin().cluster().prepareUpdateSettings().setPersistentSettings(lagDetectorTimeout).execute().actionGet();
210+
}
211+
);
212+
}
213+
113214
}

server/src/test/java/org/opensearch/cluster/coordination/LagDetectorTests.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
package org.opensearch.cluster.coordination;
3333

3434
import org.opensearch.cluster.node.DiscoveryNode;
35+
import org.opensearch.common.settings.ClusterSettings;
3536
import org.opensearch.common.settings.Settings;
3637
import org.opensearch.common.unit.TimeValue;
3738
import org.opensearch.test.OpenSearchTestCase;
@@ -70,8 +71,9 @@ public void setupFixture() {
7071
} else {
7172
followerLagTimeout = CLUSTER_FOLLOWER_LAG_TIMEOUT_SETTING.get(Settings.EMPTY);
7273
}
73-
74-
lagDetector = new LagDetector(settingsBuilder.build(), deterministicTaskQueue.getThreadPool(), failedNodes::add, () -> localNode);
74+
Settings settings = settingsBuilder.build();
75+
final ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
76+
lagDetector = new LagDetector(settings, clusterSettings, deterministicTaskQueue.getThreadPool(), failedNodes::add, () -> localNode);
7577

7678
localNode = CoordinationStateTests.createNode("local");
7779
node1 = CoordinationStateTests.createNode("node1");

0 commit comments

Comments
 (0)