Skip to content

Commit 8367a45

Browse files
jframematthew1001
authored andcommitted
World state halt and chain halt fixes (besu-eth#7027)
Signed-off-by: Jason Frame <jason.frame@consensys.net> Signed-off-by: stefan.pingel@consensys.net <stefan.pingel@consensys.net>
1 parent e5ccac7 commit 8367a45

File tree

13 files changed

+145
-57
lines changed

13 files changed

+145
-57
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
- Snap client fixes discovered during snap server testing [#6847](https://github.com/hyperledger/besu/pull/6847)
5656
- Correctly initialize the txpool as disabled on creation [#6890](https://github.com/hyperledger/besu/pull/6890)
5757
- Fix worldstate download halt when using snap sync during initial sync [#6981](https://github.com/hyperledger/besu/pull/6981)
58+
- Fix chain halt due to peers only partially responding with headers. And worldstate halts caused by a halt in the chain sync [#7027](https://github.com/hyperledger/besu/pull/7027)
5859

5960
### Download Links
6061

besu/src/main/java/org/hyperledger/besu/cli/options/unstable/SynchronizerOptions.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ public class SynchronizerOptions implements CLIOptions<SynchronizerConfiguration
3939
"--Xsynchronizer-downloader-header-request-size";
4040
private static final String DOWNLOADER_CHECKPOINT_TIMEOUTS_PERMITTED_FLAG =
4141
"--Xsynchronizer-downloader-checkpoint-timeouts-permitted";
42+
private static final String DOWNLOADER_CHECKPOINT_RETRIES_FLAG =
43+
"--Xsynchronizer-downloader-checkpoint-RETRIES";
4244
private static final String DOWNLOADER_CHAIN_SEGMENT_SIZE_FLAG =
4345
"--Xsynchronizer-downloader-chain-segment-size";
4446
private static final String DOWNLOADER_PARALLELISM_FLAG =
@@ -132,12 +134,12 @@ public void parseBlockPropagationRange(final String arg) {
132134
SynchronizerConfiguration.DEFAULT_DOWNLOADER_HEADER_REQUEST_SIZE;
133135

134136
@CommandLine.Option(
135-
names = DOWNLOADER_CHECKPOINT_TIMEOUTS_PERMITTED_FLAG,
137+
names = {DOWNLOADER_CHECKPOINT_RETRIES_FLAG, DOWNLOADER_CHECKPOINT_TIMEOUTS_PERMITTED_FLAG},
136138
hidden = true,
137139
paramLabel = "<INTEGER>",
138140
description =
139141
"Number of tries to attempt to download checkpoints before stopping (default: ${DEFAULT-VALUE})")
140-
private int downloaderCheckpointTimeoutsPermitted =
142+
private int downloaderCheckpointRetries =
141143
SynchronizerConfiguration.DEFAULT_DOWNLOADER_CHECKPOINT_TIMEOUTS_PERMITTED;
142144

143145
@CommandLine.Option(
@@ -354,8 +356,7 @@ public static SynchronizerOptions fromConfig(final SynchronizerConfiguration con
354356
config.getDownloaderChangeTargetThresholdByHeight();
355357
options.downloaderChangeTargetThresholdByTd = config.getDownloaderChangeTargetThresholdByTd();
356358
options.downloaderHeaderRequestSize = config.getDownloaderHeaderRequestSize();
357-
options.downloaderCheckpointTimeoutsPermitted =
358-
config.getDownloaderCheckpointTimeoutsPermitted();
359+
options.downloaderCheckpointRetries = config.getDownloaderCheckpointRetries();
359360
options.downloaderChainSegmentSize = config.getDownloaderChainSegmentSize();
360361
options.downloaderParallelism = config.getDownloaderParallelism();
361362
options.transactionsParallelism = config.getTransactionsParallelism();
@@ -394,7 +395,7 @@ public SynchronizerConfiguration.Builder toDomainObject() {
394395
builder.downloaderChangeTargetThresholdByHeight(downloaderChangeTargetThresholdByHeight);
395396
builder.downloaderChangeTargetThresholdByTd(downloaderChangeTargetThresholdByTd);
396397
builder.downloaderHeadersRequestSize(downloaderHeaderRequestSize);
397-
builder.downloaderCheckpointTimeoutsPermitted(downloaderCheckpointTimeoutsPermitted);
398+
builder.downloaderCheckpointRetries(downloaderCheckpointRetries);
398399
builder.downloaderChainSegmentSize(downloaderChainSegmentSize);
399400
builder.downloaderParallelism(downloaderParallelism);
400401
builder.transactionsParallelism(transactionsParallelism);
@@ -436,7 +437,7 @@ public List<String> getCLIOptions() {
436437
DOWNLOADER_HEADER_REQUEST_SIZE_FLAG,
437438
OptionParser.format(downloaderHeaderRequestSize),
438439
DOWNLOADER_CHECKPOINT_TIMEOUTS_PERMITTED_FLAG,
439-
OptionParser.format(downloaderCheckpointTimeoutsPermitted),
440+
OptionParser.format(downloaderCheckpointRetries),
440441
DOWNLOADER_CHAIN_SEGMENT_SIZE_FLAG,
441442
OptionParser.format(downloaderChainSegmentSize),
442443
DOWNLOADER_PARALLELISM_FLAG,

besu/src/test/java/org/hyperledger/besu/cli/options/SynchronizerOptionsTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ protected SynchronizerConfiguration.Builder createCustomizedDomainObject() {
6060
SynchronizerConfiguration.DEFAULT_DOWNLOADER_CHANGE_TARGET_THRESHOLD_BY_TD.add(2L))
6161
.downloaderHeadersRequestSize(
6262
SynchronizerConfiguration.DEFAULT_DOWNLOADER_HEADER_REQUEST_SIZE + 2)
63-
.downloaderCheckpointTimeoutsPermitted(
63+
.downloaderCheckpointRetries(
6464
SynchronizerConfiguration.DEFAULT_DOWNLOADER_CHECKPOINT_TIMEOUTS_PERMITTED + 2)
6565
.downloaderChainSegmentSize(
6666
SynchronizerConfiguration.DEFAULT_DOWNLOADER_CHAIN_SEGMENT_SIZE + 2)

ethereum/eth/src/main/java/org/hyperledger/besu/ethereum/eth/sync/SynchronizerConfiguration.java

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ public class SynchronizerConfiguration {
7777
private final long downloaderChangeTargetThresholdByHeight;
7878
private final UInt256 downloaderChangeTargetThresholdByTd;
7979
private final int downloaderHeaderRequestSize;
80-
private final int downloaderCheckpointTimeoutsPermitted;
80+
private final int downloaderCheckpointRetries;
8181
private final int downloaderChainSegmentSize;
8282
private final int downloaderParallelism;
8383
private final int transactionsParallelism;
@@ -101,7 +101,7 @@ private SynchronizerConfiguration(
101101
final long downloaderChangeTargetThresholdByHeight,
102102
final UInt256 downloaderChangeTargetThresholdByTd,
103103
final int downloaderHeaderRequestSize,
104-
final int downloaderCheckpointTimeoutsPermitted,
104+
final int downloaderCheckpointRetries,
105105
final int downloaderChainSegmentSize,
106106
final int downloaderParallelism,
107107
final int transactionsParallelism,
@@ -123,7 +123,7 @@ private SynchronizerConfiguration(
123123
this.downloaderChangeTargetThresholdByHeight = downloaderChangeTargetThresholdByHeight;
124124
this.downloaderChangeTargetThresholdByTd = downloaderChangeTargetThresholdByTd;
125125
this.downloaderHeaderRequestSize = downloaderHeaderRequestSize;
126-
this.downloaderCheckpointTimeoutsPermitted = downloaderCheckpointTimeoutsPermitted;
126+
this.downloaderCheckpointRetries = downloaderCheckpointRetries;
127127
this.downloaderChainSegmentSize = downloaderChainSegmentSize;
128128
this.downloaderParallelism = downloaderParallelism;
129129
this.transactionsParallelism = transactionsParallelism;
@@ -191,8 +191,8 @@ public int getDownloaderHeaderRequestSize() {
191191
return downloaderHeaderRequestSize;
192192
}
193193

194-
public int getDownloaderCheckpointTimeoutsPermitted() {
195-
return downloaderCheckpointTimeoutsPermitted;
194+
public int getDownloaderCheckpointRetries() {
195+
return downloaderCheckpointRetries;
196196
}
197197

198198
public int getDownloaderChainSegmentSize() {
@@ -264,8 +264,7 @@ public static class Builder {
264264
private UInt256 downloaderChangeTargetThresholdByTd =
265265
DEFAULT_DOWNLOADER_CHANGE_TARGET_THRESHOLD_BY_TD;
266266
private int downloaderHeaderRequestSize = DEFAULT_DOWNLOADER_HEADER_REQUEST_SIZE;
267-
private int downloaderCheckpointTimeoutsPermitted =
268-
DEFAULT_DOWNLOADER_CHECKPOINT_TIMEOUTS_PERMITTED;
267+
private int downloaderCheckpointRetries = DEFAULT_DOWNLOADER_CHECKPOINT_TIMEOUTS_PERMITTED;
269268
private SnapSyncConfiguration snapSyncConfiguration = SnapSyncConfiguration.getDefault();
270269
private int downloaderChainSegmentSize = DEFAULT_DOWNLOADER_CHAIN_SEGMENT_SIZE;
271270
private int downloaderParallelism = DEFAULT_DOWNLOADER_PARALLELISM;
@@ -327,9 +326,8 @@ public Builder downloaderHeadersRequestSize(final int downloaderHeaderRequestSiz
327326
return this;
328327
}
329328

330-
public Builder downloaderCheckpointTimeoutsPermitted(
331-
final int downloaderCheckpointTimeoutsPermitted) {
332-
this.downloaderCheckpointTimeoutsPermitted = downloaderCheckpointTimeoutsPermitted;
329+
public Builder downloaderCheckpointRetries(final int downloaderCheckpointRetries) {
330+
this.downloaderCheckpointRetries = downloaderCheckpointRetries;
333331
return this;
334332
}
335333

@@ -422,7 +420,7 @@ public SynchronizerConfiguration build() {
422420
downloaderChangeTargetThresholdByHeight,
423421
downloaderChangeTargetThresholdByTd,
424422
downloaderHeaderRequestSize,
425-
downloaderCheckpointTimeoutsPermitted,
423+
downloaderCheckpointRetries,
426424
downloaderChainSegmentSize,
427425
downloaderParallelism,
428426
transactionsParallelism,

ethereum/eth/src/main/java/org/hyperledger/besu/ethereum/eth/sync/fastsync/FastSyncDownloadPipelineFactory.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ public Pipeline<SyncTargetRange> createDownloadPipelineForSyncTarget(final SyncT
125125
ethContext.getScheduler(),
126126
target.peer(),
127127
getCommonAncestor(target),
128-
syncConfig.getDownloaderCheckpointTimeoutsPermitted(),
128+
syncConfig.getDownloaderCheckpointRetries(),
129129
SyncTerminationCondition.never());
130130
final DownloadHeadersStep downloadHeadersStep =
131131
new DownloadHeadersStep(

ethereum/eth/src/main/java/org/hyperledger/besu/ethereum/eth/sync/fastsync/PivotSelectorFromSafeBlock.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
import java.util.Optional;
3030
import java.util.concurrent.CompletableFuture;
31+
import java.util.concurrent.TimeUnit;
3132
import java.util.function.Supplier;
3233

3334
import org.slf4j.Logger;
@@ -127,7 +128,7 @@ public long getBestChainHeight() {
127128
maybeCachedHeadBlockHeader = Optional.of(blockHeader);
128129
return blockHeader.getNumber();
129130
})
130-
.get();
131+
.get(20, TimeUnit.SECONDS);
131132
} catch (Throwable t) {
132133
LOG.debug(
133134
"Error trying to download chain head block header by hash {}",

ethereum/eth/src/main/java/org/hyperledger/besu/ethereum/eth/sync/fullsync/FullSyncDownloadPipelineFactory.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ public Pipeline<?> createDownloadPipelineForSyncTarget(final SyncTarget target)
9191
ethContext.getScheduler(),
9292
target.peer(),
9393
target.commonAncestor(),
94-
syncConfig.getDownloaderCheckpointTimeoutsPermitted(),
94+
syncConfig.getDownloaderCheckpointRetries(),
9595
fullSyncTerminationCondition);
9696
final DownloadHeadersStep downloadHeadersStep =
9797
new DownloadHeadersStep(

ethereum/eth/src/main/java/org/hyperledger/besu/ethereum/eth/sync/range/SyncTargetRangeSource.java

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.hyperledger.besu.ethereum.eth.manager.EthPeer;
2323
import org.hyperledger.besu.ethereum.eth.manager.EthScheduler;
2424
import org.hyperledger.besu.ethereum.eth.sync.fullsync.SyncTerminationCondition;
25+
import org.hyperledger.besu.ethereum.p2p.rlpx.wire.messages.DisconnectMessage;
2526

2627
import java.time.Duration;
2728
import java.util.ArrayDeque;
@@ -44,31 +45,31 @@ public class SyncTargetRangeSource implements Iterator<SyncTargetRange> {
4445
private final SyncTargetChecker syncTargetChecker;
4546
private final EthPeer peer;
4647
private final EthScheduler ethScheduler;
47-
private final int rangeTimeoutsPermitted;
48+
private final int retriesPermitted;
4849
private final Duration newHeaderWaitDuration;
4950
private final SyncTerminationCondition terminationCondition;
5051

5152
private final Queue<SyncTargetRange> retrievedRanges = new ArrayDeque<>();
5253
private BlockHeader lastRangeEnd;
5354
private boolean reachedEndOfRanges = false;
5455
private Optional<CompletableFuture<List<BlockHeader>>> pendingRequests = Optional.empty();
55-
private int requestFailureCount = 0;
56+
private int retryCount = 0;
5657

5758
public SyncTargetRangeSource(
5859
final RangeHeadersFetcher fetcher,
5960
final SyncTargetChecker syncTargetChecker,
6061
final EthScheduler ethScheduler,
6162
final EthPeer peer,
6263
final BlockHeader commonAncestor,
63-
final int rangeTimeoutsPermitted,
64+
final int retriesPermitted,
6465
final SyncTerminationCondition terminationCondition) {
6566
this(
6667
fetcher,
6768
syncTargetChecker,
6869
ethScheduler,
6970
peer,
7071
commonAncestor,
71-
rangeTimeoutsPermitted,
72+
retriesPermitted,
7273
Duration.ofSeconds(5),
7374
terminationCondition);
7475
}
@@ -79,15 +80,15 @@ public SyncTargetRangeSource(
7980
final EthScheduler ethScheduler,
8081
final EthPeer peer,
8182
final BlockHeader commonAncestor,
82-
final int rangeTimeoutsPermitted,
83+
final int retriesPermitted,
8384
final Duration newHeaderWaitDuration,
8485
final SyncTerminationCondition terminationCondition) {
8586
this.fetcher = fetcher;
8687
this.syncTargetChecker = syncTargetChecker;
8788
this.ethScheduler = ethScheduler;
8889
this.peer = peer;
8990
this.lastRangeEnd = commonAncestor;
90-
this.rangeTimeoutsPermitted = rangeTimeoutsPermitted;
91+
this.retriesPermitted = retriesPermitted;
9192
this.newHeaderWaitDuration = newHeaderWaitDuration;
9293
this.terminationCondition = terminationCondition;
9394
}
@@ -96,7 +97,7 @@ public SyncTargetRangeSource(
9697
public boolean hasNext() {
9798
return terminationCondition.shouldContinueDownload()
9899
&& (!retrievedRanges.isEmpty()
99-
|| (requestFailureCount < rangeTimeoutsPermitted
100+
|| (retryCount < retriesPermitted
100101
&& syncTargetChecker.shouldContinueDownloadingFromSyncTarget(peer, lastRangeEnd)
101102
&& !reachedEndOfRanges));
102103
}
@@ -148,13 +149,21 @@ private SyncTargetRange getRangeFromPendingRequest() {
148149
pendingRequest.get(newHeaderWaitDuration.toMillis(), MILLISECONDS);
149150
this.pendingRequests = Optional.empty();
150151
if (newHeaders.isEmpty()) {
151-
requestFailureCount++;
152+
retryCount++;
153+
if (retryCount >= retriesPermitted) {
154+
LOG.atDebug()
155+
.setMessage(
156+
"Disconnecting target peer for providing useless or empty range header: {}.")
157+
.addArgument(peer)
158+
.log();
159+
peer.disconnect(DisconnectMessage.DisconnectReason.USELESS_PEER_USELESS_RESPONSES);
160+
}
152161
} else {
153-
requestFailureCount = 0;
154-
}
155-
for (final BlockHeader header : newHeaders) {
156-
retrievedRanges.add(new SyncTargetRange(peer, lastRangeEnd, header));
157-
lastRangeEnd = header;
162+
retryCount = 0;
163+
for (final BlockHeader header : newHeaders) {
164+
retrievedRanges.add(new SyncTargetRange(peer, lastRangeEnd, header));
165+
lastRangeEnd = header;
166+
}
158167
}
159168
return retrievedRanges.poll();
160169
} catch (final InterruptedException e) {
@@ -163,7 +172,7 @@ private SyncTargetRange getRangeFromPendingRequest() {
163172
} catch (final ExecutionException e) {
164173
LOG.debug("Failed to retrieve new range headers", e);
165174
this.pendingRequests = Optional.empty();
166-
requestFailureCount++;
175+
retryCount++;
167176
return null;
168177
} catch (final TimeoutException e) {
169178
return null;

ethereum/eth/src/main/java/org/hyperledger/besu/ethereum/eth/sync/snapsync/DynamicPivotBlockSelector.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ private CompletableFuture<Void> downloadNewPivotBlock(final FastSyncState fss) {
161161
.addArgument(this::logLastPivotBlockFound)
162162
.log();
163163
})
164-
.orTimeout(5, TimeUnit.MINUTES);
164+
.orTimeout(20, TimeUnit.SECONDS);
165165
}
166166

167167
private boolean isSamePivotBlock(final FastSyncState fss) {

ethereum/eth/src/main/java/org/hyperledger/besu/ethereum/eth/sync/snapsync/SnapWorldDownloadState.java

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.hyperledger.besu.ethereum.chain.BlockAddedObserver;
2222
import org.hyperledger.besu.ethereum.chain.Blockchain;
2323
import org.hyperledger.besu.ethereum.core.BlockHeader;
24+
import org.hyperledger.besu.ethereum.eth.manager.EthContext;
2425
import org.hyperledger.besu.ethereum.eth.sync.snapsync.context.SnapSyncStatePersistenceManager;
2526
import org.hyperledger.besu.ethereum.eth.sync.snapsync.request.AccountRangeDataRequest;
2627
import org.hyperledger.besu.ethereum.eth.sync.snapsync.request.BytecodeRequest;
@@ -86,6 +87,7 @@ public class SnapWorldDownloadState extends WorldDownloadState<SnapDataRequest>
8687
// blockchain
8788
private final Blockchain blockchain;
8889
private final Long blockObserverId;
90+
private final EthContext ethContext;
8991

9092
// metrics around the snapsync
9193
private final SnapSyncMetricsManager metricsManager;
@@ -99,7 +101,8 @@ public SnapWorldDownloadState(
99101
final int maxRequestsWithoutProgress,
100102
final long minMillisBeforeStalling,
101103
final SnapSyncMetricsManager metricsManager,
102-
final Clock clock) {
104+
final Clock clock,
105+
final EthContext ethContext) {
103106
super(
104107
worldStateStorageCoordinator,
105108
pendingRequests,
@@ -111,6 +114,7 @@ public SnapWorldDownloadState(
111114
this.snapSyncState = snapSyncState;
112115
this.metricsManager = metricsManager;
113116
this.blockObserverId = blockchain.observeBlockAdded(createBlockchainObserver());
117+
this.ethContext = ethContext;
114118

115119
metricsManager
116120
.getMetricsSystem()
@@ -423,23 +427,29 @@ public void setPivotBlockSelector(final DynamicPivotBlockSelector pivotBlockSele
423427
}
424428

425429
public BlockAddedObserver createBlockchainObserver() {
426-
return addedBlockContext -> {
427-
final AtomicBoolean foundNewPivotBlock = new AtomicBoolean(false);
428-
pivotBlockSelector.check(
429-
(____, isNewPivotBlock) -> {
430-
if (isNewPivotBlock) {
431-
foundNewPivotBlock.set(true);
432-
}
433-
});
434-
435-
final boolean isNewPivotBlockFound = foundNewPivotBlock.get();
436-
final boolean isBlockchainCaughtUp =
437-
snapSyncState.isWaitingBlockchain() && !pivotBlockSelector.isBlockchainBehind();
438-
439-
if (snapSyncState.isHealTrieInProgress() && (isNewPivotBlockFound || isBlockchainCaughtUp)) {
440-
snapSyncState.setWaitingBlockchain(false);
441-
reloadTrieHeal();
442-
}
443-
};
430+
return addedBlockContext ->
431+
ethContext
432+
.getScheduler()
433+
.executeServiceTask(
434+
() -> {
435+
final AtomicBoolean foundNewPivotBlock = new AtomicBoolean(false);
436+
pivotBlockSelector.check(
437+
(____, isNewPivotBlock) -> {
438+
if (isNewPivotBlock) {
439+
foundNewPivotBlock.set(true);
440+
}
441+
});
442+
443+
final boolean isNewPivotBlockFound = foundNewPivotBlock.get();
444+
final boolean isBlockchainCaughtUp =
445+
snapSyncState.isWaitingBlockchain()
446+
&& !pivotBlockSelector.isBlockchainBehind();
447+
448+
if (snapSyncState.isHealTrieInProgress()
449+
&& (isNewPivotBlockFound || isBlockchainCaughtUp)) {
450+
snapSyncState.setWaitingBlockchain(false);
451+
reloadTrieHeal();
452+
}
453+
});
444454
}
445455
}

0 commit comments

Comments
 (0)