Skip to content
Open
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
7d0c13d
statement-store: defer statement protocol connections during major sync
DenzelPenzel Mar 24, 2026
c8a9ed7
statement-store: major sync tests
DenzelPenzel Mar 26, 2026
a382525
Update from github-actions[bot] running command 'fmt'
github-actions[bot] Mar 26, 2026
7cedec4
statement-store: clippy
DenzelPenzel Mar 27, 2026
5a8e3f1
statement-store: fix spawn_network to support single-collator tests,
DenzelPenzel Mar 27, 2026
b87cd17
Update from github-actions[bot] running command 'prdoc --audience nod…
github-actions[bot] Mar 27, 2026
8db0b1a
statement-store: use BTreeMap for deferred_peers
DenzelPenzel Mar 31, 2026
2120c5d
statement-store: clippy
DenzelPenzel Mar 31, 2026
038e8e5
Merge branch 'master' into denzelpenzel/statement-store-loss-during-m…
DenzelPenzel Mar 31, 2026
0576365
statement-store: fix deferred peer race and extract sync transition c…
DenzelPenzel Apr 1, 2026
b0a6db0
Merge remote-tracking branch 'origin/master' into denzelpenzel/statem…
DenzelPenzel Apr 2, 2026
2934407
statement-store: address review feedback
DenzelPenzel Apr 2, 2026
acf6280
Update from github-actions[bot] running command 'fmt'
github-actions[bot] Apr 2, 2026
6e62495
statement-store: reconnect peers after major sync for statement recovery
DenzelPenzel Apr 7, 2026
20a4386
statement-store: update prdoc to reflect simplified approach
DenzelPenzel Apr 7, 2026
037cc4a
Merge remote-tracking branch 'origin/master' into denzelpenzel/statem…
DenzelPenzel Apr 10, 2026
6ffe8e7
Merge remote-tracking branch 'origin/master' into denzelpenzel/statem…
DenzelPenzel Apr 10, 2026
39dc866
statement-store: fix conflicts
DenzelPenzel Apr 10, 2026
4903081
Update from github-actions[bot] running command 'fmt'
github-actions[bot] Apr 10, 2026
85aecdc
statement-store: added the new ci matrix test
DenzelPenzel Apr 10, 2026
07e6433
Merge remote-tracking branch 'origin/denzelpenzel/statement-store-los…
DenzelPenzel Apr 10, 2026
fe16cee
statement-store: improve reconnect_statement_peers robustness and tes…
DenzelPenzel Apr 10, 2026
79890d7
statement-store: recover statements lost during major sync
DenzelPenzel Apr 13, 2026
e2118dd
statement-store: check statements
DenzelPenzel Apr 13, 2026
a8d7939
Update from github-actions[bot] running command 'fmt'
github-actions[bot] Apr 13, 2026
b69e515
statement-store: incr timeout
DenzelPenzel Apr 13, 2026
b076543
Merge branch 'denzelpenzel/statement-store-loss-during-major-sync' of…
DenzelPenzel Apr 13, 2026
fc46117
Update from github-actions[bot] running command 'fmt'
github-actions[bot] Apr 13, 2026
2af5257
statement-store: improve naming
DenzelPenzel Apr 14, 2026
c96de87
statement-store: buffer peers during major sync instead of remove+add…
DenzelPenzel Apr 16, 2026
922309e
Merge remote-tracking branch 'origin/master' into denzelpenzel/statem…
DenzelPenzel Apr 16, 2026
637f1b0
statement-store: unit test with_syncing
DenzelPenzel Apr 16, 2026
1803dab
statement-store: update prdoc
DenzelPenzel Apr 16, 2026
e09bd4c
Update from github-actions[bot] running command 'fmt'
github-actions[bot] Apr 16, 2026
0a86fbf
statement-store: recover statements dropped during major sync
DenzelPenzel Apr 16, 2026
76f2487
Merge remote-tracking branch 'origin/denzelpenzel/statement-store-los…
DenzelPenzel Apr 16, 2026
6e86175
statement-store: improve recovery integration test coverage
DenzelPenzel Apr 16, 2026
6a90c6b
Update from github-actions[bot] running command 'fmt'
github-actions[bot] Apr 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/zombienet-tests/zombienet_cumulus_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,9 @@
runner-type: "default"
cumulus-image: "test-parachain"
use-zombienet-sdk: true

- job-name: "zombienet-cumulus-0019-statement_store_recovery_after_major_sync"
test-filter: "zombie_ci::statement_store::integration::statement_store_recovery_after_major_sync"
runner-type: "default"
cumulus-image: "test-parachain"
use-zombienet-sdk: true
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ pub(super) async fn spawn_network_with_injected_allowances(
collators: &[&str],
participant_count: u32,
) -> Result<Network<LocalFileSystem>, anyhow::Error> {
assert!(collators.len() >= 2);
assert!(!collators.is_empty());
let images = zombienet_sdk::environment::get_images_from_env();

let base_dir = std::env::var("ZOMBIENET_SDK_BASE_DIR")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// SPDX-License-Identifier: Apache-2.0

use std::collections::HashSet;
use std::{cell::Cell, collections::HashSet};

use codec::Encode;
use log::{debug, info};
Expand Down Expand Up @@ -381,3 +381,117 @@ async fn statement_store_crash_mid_sync() -> Result<(), anyhow::Error> {
info!("Node crash recovery test passed");
Ok(())
}

/// Test that verifies statement recovery after major sync completes
///
/// Scenario:
/// 1. Spawn charlie only and let the relay chain advance ~10 blocks
/// 2. Submit multiple statements to charlie
/// 3. Add dave as a late joiner — dave will enter major sync because the chain has already
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you have anything to proove 3 happens ?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this cond confirm it

dave_logs.lines().any(|l| l.contains("Major sync complete, reconnecting")),

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But ... this confirms just that you had a major sync happening not that, you received statements while you were in major sync.

/// progressed. During major sync, `handle_sync_event` buffers charlie in `deferred_peers`
/// instead of adding it to the reserved set, so no statement substream is opened and no initial
/// sync occurs
/// 4. Wait for dave to exit major sync
/// 5. On sync-end, `drain_deferred_peers` adds charlie to the reserved set; the substream opens and
/// charlie performs initial sync with dave, delivering the statements
/// 6. Subscribe to statements on dave AFTER sync has ended and assert all statements arrive
#[tokio::test(flavor = "multi_thread")]
async fn statement_store_recovery_after_major_sync() -> Result<(), anyhow::Error> {
let _ = env_logger::try_init_from_env(
env_logger::Env::default().filter_or(env_logger::DEFAULT_FILTER_ENV, "info"),
);

const STATEMENT_COUNT: usize = 5;
let items = create_allowance_items(&[(
0,
StatementAllowance { max_count: STATEMENT_COUNT as u32, max_size: 1_000_000 },
)]);
let mut network = spawn_network_sudo(&["charlie", "alice"], items).await?;
Comment thread
DenzelPenzel marked this conversation as resolved.

let charlie = network.get_node("charlie")?;
let charlie_rpc = charlie.rpc().await?;

// Wait for at least 10 parachain blocks before dave joins.
// More blocks means the relay chain has also advanced further, giving dave a wider sync
// window and making it reliably enter major-sync mode for long enough that the statement
// handler's 100ms poll observes the true → false transition
let charlie_height = {
let h = Cell::new(0.0f64);
charlie
.wait_metric_with_timeout(
"block_height{status=\"best\"}",
|v| {
h.set(v);
v >= 10.0
},
180u64,
)
.await
.map_err(|_| anyhow::anyhow!("Charlie did not reach block 10 within 180s"))?;
h.get()
};
info!("Charlie is at block height {:.0} before dave joins", charlie_height);

let topic: Topic = [0u8; 32].into();
let keypair = get_keypair(0);
let statements: Vec<_> = (0..STATEMENT_COUNT as u32)
.map(|seq| create_test_statement(&keypair, &[topic], None, vec![seq as u8], u32::MAX, seq))
.collect();
let mut expected: Vec<Vec<u8>> = statements.iter().map(|s| s.encode()).collect();
expected.sort();

for stmt in &statements {
let result = submit_statement(&charlie_rpc, stmt).await?;
assert_eq!(result, SubmitResult::New);
}
info!("{} statements submitted to charlie", STATEMENT_COUNT);

// Add dave as a late-joining collator.
// Dave will enter major sync because the chain already advanced. During that window,
// handle_sync_event buffers charlie in deferred_peers rather than adding it to the
// reserved set, so no statement substream is opened and no initial sync fires.
// When sync ends, drain_deferred_peers adds charlie to the reserved set; the substream
// opens and charlie delivers the statements via initial sync
info!("Adding dave as late-joining collator");
let dave_join_time = std::time::Instant::now();
network.add_collator("dave", Default::default(), 1004).await?;

let dave = network.get_node("dave")?;
let dave_rpc = dave.rpc().await?;

// Wait for dave to reach charlie's block height
dave.wait_metric_with_timeout("block_height{status=\"best\"}", |h| h >= charlie_height, 120u64)
.await
.map_err(|_| {
anyhow::anyhow!("Dave did not reach block height {:.0} within 120s", charlie_height)
})?;
let sync_end = dave_join_time.elapsed();
info!("Dave reached block height {:.0} after {:.1}s", charlie_height, sync_end.as_secs_f64());

// Subscribe after sync — any statements arriving are exclusively due to
// drain_deferred_peers adding charlie to the reserved set and triggering initial sync.
// The subscription also returns statements already in dave's store as an initial batch,
// so we capture all recovered statements regardless of timing relative to the subscribe call
let mut subscription = subscribe_topic(&dave_rpc, topic).await?;
let received = expect_statements_unordered(&mut subscription, STATEMENT_COUNT, 30).await?;
let mut received_bytes: Vec<Vec<u8>> = received.into_iter().map(|b| b.to_vec()).collect();
received_bytes.sort();
assert_eq!(received_bytes, expected);
info!(
"All {} statements arrived {:.1}s after dave finished syncing",
STATEMENT_COUNT,
dave_join_time.elapsed().as_secs_f64() - sync_end.as_secs_f64()
);

// By the time all statements have arrived, drain_deferred_peers must have already fired
// and been logged (it is what triggered the initial sync from charlie that delivered them).
// Checking logs here — after statement delivery — avoids the race where the handler hasn't
// polled yet at the moment we read logs
let dave_logs = dave.logs().await?;
assert!(
dave_logs.lines().any(|l| l.contains("Major sync complete, adding")),
"drain_deferred_peers did not fire — dave may not have entered major sync or had no deferred peers"
);

Ok(())
}
19 changes: 19 additions & 0 deletions prdoc/pr_11487.prdoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
title: 'statement-store: buffer peers during major sync for statement recovery'
doc:
- audience: Node Dev
description: |-
# Description

Nodes drop all statements received from peers during major sync because of the
`is_major_syncing()` guard. Before this fix, peers were added to the reserved set immediately
on connect, so their initial-sync batch fired while the handler was ignoring all incoming
statements — those statements were permanently lost.

This fix changes `handle_sync_event` to buffer peers that connect during major sync in a
`deferred_peers` set instead of adding them to the reserved set. When major sync ends, the
`was_major_syncing → false` transition drains the buffer: all deferred peers are added to
the reserved set in a single call, their notification substreams open, and each peer
performs a fresh initial sync that delivers the statements missed during sync.
crates:
- name: sc-network-statement
bump: patch
Loading
Loading