Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
7d0c13d
statement-store: defer statement protocol connections during major sync
DenzelPenzel Mar 24, 2026
c8a9ed7
statement-store: major sync tests
DenzelPenzel Mar 26, 2026
a382525
Update from github-actions[bot] running command 'fmt'
github-actions[bot] Mar 26, 2026
7cedec4
statement-store: clippy
DenzelPenzel Mar 27, 2026
5a8e3f1
statement-store: fix spawn_network to support single-collator tests,
DenzelPenzel Mar 27, 2026
b87cd17
Update from github-actions[bot] running command 'prdoc --audience nod…
github-actions[bot] Mar 27, 2026
8db0b1a
statement-store: use BTreeMap for deferred_peers
DenzelPenzel Mar 31, 2026
2120c5d
statement-store: clippy
DenzelPenzel Mar 31, 2026
038e8e5
Merge branch 'master' into denzelpenzel/statement-store-loss-during-m…
DenzelPenzel Mar 31, 2026
0576365
statement-store: fix deferred peer race and extract sync transition c…
DenzelPenzel Apr 1, 2026
b0a6db0
Merge remote-tracking branch 'origin/master' into denzelpenzel/statem…
DenzelPenzel Apr 2, 2026
2934407
statement-store: address review feedback
DenzelPenzel Apr 2, 2026
acf6280
Update from github-actions[bot] running command 'fmt'
github-actions[bot] Apr 2, 2026
6e62495
statement-store: reconnect peers after major sync for statement recovery
DenzelPenzel Apr 7, 2026
20a4386
statement-store: update prdoc to reflect simplified approach
DenzelPenzel Apr 7, 2026
037cc4a
Merge remote-tracking branch 'origin/master' into denzelpenzel/statem…
DenzelPenzel Apr 10, 2026
6ffe8e7
Merge remote-tracking branch 'origin/master' into denzelpenzel/statem…
DenzelPenzel Apr 10, 2026
39dc866
statement-store: fix conflicts
DenzelPenzel Apr 10, 2026
4903081
Update from github-actions[bot] running command 'fmt'
github-actions[bot] Apr 10, 2026
85aecdc
statement-store: added the new ci matrix test
DenzelPenzel Apr 10, 2026
07e6433
Merge remote-tracking branch 'origin/denzelpenzel/statement-store-los…
DenzelPenzel Apr 10, 2026
fe16cee
statement-store: improve reconnect_statement_peers robustness and tes…
DenzelPenzel Apr 10, 2026
79890d7
statement-store: recover statements lost during major sync
DenzelPenzel Apr 13, 2026
e2118dd
statement-store: check statements
DenzelPenzel Apr 13, 2026
a8d7939
Update from github-actions[bot] running command 'fmt'
github-actions[bot] Apr 13, 2026
b69e515
statement-store: incr timeout
DenzelPenzel Apr 13, 2026
b076543
Merge branch 'denzelpenzel/statement-store-loss-during-major-sync' of…
DenzelPenzel Apr 13, 2026
fc46117
Update from github-actions[bot] running command 'fmt'
github-actions[bot] Apr 13, 2026
2af5257
statement-store: improve naming
DenzelPenzel Apr 14, 2026
c96de87
statement-store: buffer peers during major sync instead of remove+add…
DenzelPenzel Apr 16, 2026
922309e
Merge remote-tracking branch 'origin/master' into denzelpenzel/statem…
DenzelPenzel Apr 16, 2026
637f1b0
statement-store: unit test with_syncing
DenzelPenzel Apr 16, 2026
1803dab
statement-store: update prdoc
DenzelPenzel Apr 16, 2026
e09bd4c
Update from github-actions[bot] running command 'fmt'
github-actions[bot] Apr 16, 2026
0a86fbf
statement-store: recover statements dropped during major sync
DenzelPenzel Apr 16, 2026
76f2487
Merge remote-tracking branch 'origin/denzelpenzel/statement-store-los…
DenzelPenzel Apr 16, 2026
6e86175
statement-store: improve recovery integration test coverage
DenzelPenzel Apr 16, 2026
6a90c6b
Update from github-actions[bot] running command 'fmt'
github-actions[bot] Apr 16, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/zombienet-tests/zombienet_cumulus_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,9 @@
runner-type: "default"
cumulus-image: "test-parachain"
use-zombienet-sdk: true

- job-name: "zombienet-cumulus-0019-statement_store_recovery_after_major_sync"
test-filter: "zombie_ci::statement_store::integration::statement_store_recovery_after_major_sync"
runner-type: "default"
cumulus-image: "test-parachain"
use-zombienet-sdk: true
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ pub(super) async fn spawn_network_with_injected_allowances(
collators: &[&str],
participant_count: u32,
) -> Result<Network<LocalFileSystem>, anyhow::Error> {
assert!(collators.len() >= 2);
assert!(!collators.is_empty());
let images = zombienet_sdk::environment::get_images_from_env();

let base_dir = std::env::var("ZOMBIENET_SDK_BASE_DIR")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// SPDX-License-Identifier: Apache-2.0

use std::collections::HashSet;
use std::{cell::Cell, collections::HashSet};

use codec::Encode;
use log::{debug, info};
Expand Down Expand Up @@ -381,3 +381,102 @@ async fn statement_store_crash_mid_sync() -> Result<(), anyhow::Error> {
info!("Node crash recovery test passed");
Ok(())
}

/// Verifies the `deferred_peers` buffer delivers statements to a late-joining node.
///
/// Dave joins after charlie has produced ~10 blocks and enters major sync. While syncing,
/// dave's statement handler holds charlie/alice's peer IDs in `deferred_peers` — no statement
/// substream opens until sync ends. Statements submitted both before and during dave's sync
/// window must all arrive via the single initial sync that fires when `drain_deferred_peers`
/// runs on sync completion.
#[tokio::test(flavor = "multi_thread")]
async fn statement_store_recovery_after_major_sync() -> Result<(), anyhow::Error> {
let _ = env_logger::try_init_from_env(
env_logger::Env::default().filter_or(env_logger::DEFAULT_FILTER_ENV, "info"),
);

const PRE_JOIN_COUNT: usize = 3;
const DURING_SYNC_COUNT: usize = 2;
const TOTAL: usize = PRE_JOIN_COUNT + DURING_SYNC_COUNT;
let items = create_allowance_items(&[(
0,
StatementAllowance { max_count: TOTAL as u32, max_size: 1_000_000 },
)]);
let mut network = spawn_network_sudo(&["charlie", "alice"], items).await?;
Comment thread
DenzelPenzel marked this conversation as resolved.

let charlie = network.get_node("charlie")?;
let charlie_rpc = charlie.rpc().await?;

// Wait for at least 10 blocks so any late joiner reliably enters major sync
let charlie_height = {
let h = Cell::new(0.0f64);
charlie
.wait_metric_with_timeout(
"block_height{status=\"best\"}",
|v| {
h.set(v);
v >= 10.0
},
180u64,
)
.await
.map_err(|_| anyhow::anyhow!("Charlie did not reach block 10 within 180s"))?;
h.get()
};
info!("Charlie at block {:.0} before dave joins", charlie_height);

let topic: Topic = [0u8; 32].into();
let keypair = get_keypair(0);
let pre_join: Vec<_> = (0..PRE_JOIN_COUNT as u32)
.map(|seq| create_test_statement(&keypair, &[topic], None, vec![seq as u8], u32::MAX, seq))
.collect();
for stmt in &pre_join {
assert_eq!(submit_statement(&charlie_rpc, stmt).await?, SubmitResult::New);
}

info!("Adding dave as late-joining collator");
let dave_join_time = std::time::Instant::now();
network.add_collator("dave", Default::default(), 1004).await?;
let dave = network.get_node("dave")?;
let dave_rpc = dave.rpc().await?;

// Subscribe immediately after dave starts — the deferred_peers buffer prevents any
// substream from opening while dave is syncing, so this subscription starts empty.
let mut sub = subscribe_topic(&dave_rpc, topic).await?;

// Dave holds charlie/alice's peer IDs in deferred_peers during sync; on sync-end
// drain fires, substream opens, and charlie's initial sync delivers both batches
let during_sync: Vec<_> = (PRE_JOIN_COUNT as u32..(PRE_JOIN_COUNT + DURING_SYNC_COUNT) as u32)
.map(|seq| create_test_statement(&keypair, &[topic], None, vec![seq as u8], u32::MAX, seq))
.collect();
for stmt in &during_sync {
assert_eq!(submit_statement(&charlie_rpc, stmt).await?, SubmitResult::New);
}

dave.wait_metric_with_timeout("block_height{status=\"best\"}", |h| h >= charlie_height, 120u64)
.await
.map_err(|_| {
anyhow::anyhow!("Dave did not reach block height {:.0} within 120s", charlie_height)
})?;
let sync_end = dave_join_time.elapsed();
info!("Dave synced to block {:.0} in {:.1}s", charlie_height, sync_end.as_secs_f64());

let received = expect_statements_unordered(&mut sub, TOTAL, 30).await?;
let mut expected: Vec<Vec<u8>> =
pre_join.iter().chain(during_sync.iter()).map(|s| s.encode()).collect();
expected.sort();
let mut received_bytes: Vec<Vec<u8>> = received.into_iter().map(|b| b.to_vec()).collect();
received_bytes.sort();
assert_eq!(received_bytes, expected, "Dave must receive all {TOTAL} statements after sync");
info!(
"All {TOTAL} statements ({PRE_JOIN_COUNT} pre-join + {DURING_SYNC_COUNT} during-sync) \
arrived {:.1}s after dave finished syncing",
dave_join_time.elapsed().as_secs_f64() - sync_end.as_secs_f64(),
);

// Verify drain_deferred_peers fired
let dave_logs = dave.logs().await?;
assert!(dave_logs.lines().any(|l| l.contains("Major sync complete, adding")));

Ok(())
}
18 changes: 18 additions & 0 deletions prdoc/pr_11487.prdoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
title: 'statement-store: fix statement loss during major sync'
doc:
- audience: Node Dev
description: |-
Fixes two cases where statements are permanently lost during major sync:

**New peers (fresh start):** peers connecting while major sync is active are now buffered
in `deferred_peers` instead of being added to the reserved set immediately. When sync ends,
all buffered peers are added in one call so each performs a fresh initial sync. This removes
the previous `remove+add` reconnect that caused a 5-second backoff and peerset races.

**Already-connected peers (fall-behind):** when a running node temporarily falls behind
5+ blocks, statements from already-connected peers are dropped by the `is_major_syncing()`
guard with no recovery path. On sync-end, one random peer is force-disconnected and
re-added after a 60-second window so it re-sends its statements via initial sync.
crates:
- name: sc-network-statement
bump: patch
1 change: 1 addition & 0 deletions substrate/client/network/statement/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ futures = { workspace = true }
governor = { workspace = true }
log = { workspace = true, default-features = true }
prometheus-endpoint = { workspace = true, default-features = true }
rand = { workspace = true, default-features = true }
sc-network = { workspace = true, default-features = true }
sc-network-common = { workspace = true, default-features = true }
sc-network-sync = { workspace = true, default-features = true }
Expand Down
Loading
Loading