frame-omni-bencher: enable jemalloc-allocator (#11069)

sigurpol · github-actions[bot] · bkchr · web-flow · commit bc42349097da · 2026-02-15T23:36:37.000Z
Fix huge benchmark regression for storage-heavy extrinsics, enabling jemalloc-allocator via polkadot-jemalloc-shim for omni-bencher, marked as optional in the scope of PR #10590. This close paritytech/trie#230. Thanks @alexggh and @cheme for the help 🙇 Tested against `runtime / main` and [2.1.0](polkadot-fellows/runtimes#1065) as described [here](paritytech/trie#230 (comment)). For the `usual` exstrinsic `force_apply_min_commission` doing massive storage allocation/deallocation on benchmark setup and then just 1read - 2 write in the benchmark extrinsic itself, times goes down from ms to µs. The regression was introduced by #10590 `sc-client-db: Make jemalloc optional` ```bash runtimes git:(sigurpol-release-2_0_6) /home/paolo/github/polkadot-sdk/target/release/frame-omni-bencher v1 benchmark pallet --runtime ./target/release/wbuild/asset-hub-polkadot-runtime/asset_hub_polkadot_runtime.compact.compressed.wasm --pallet pallet_staking_async --extrinsic "force_apply_min_commission" --steps 2 --repeat 1 2026-02-13T15:06:30.145367Z INFO frame::benchmark::pallet: Initialized runtime log filter to 'INFO' 2026-02-13T15:06:31.784936Z INFO pallet_collator_selection::pallet: assembling new collators for new session 0 at #0 2026-02-13T15:06:31.784966Z INFO pallet_collator_selection::pallet: assembling new collators for new session 1 at #0 2026-02-13T15:08:29.701636Z INFO frame::benchmark::pallet: [ 0 % ] Starting benchmark: pallet_staking_async::force_apply_min_commission 2026-02-13T15:08:35.130403Z INFO frame::benchmark::pallet: [ 0 % ] Running benchmark: pallet_staking_async::force_apply_min_commission (overtime) Pallet: "pallet_staking_async", Extrinsic: "force_apply_min_commission", Lowest values: [], Highest values: [], Steps: 2, Repeat: 1 Raw Storage Info ======== Storage: `Staking::MinCommission` (r:1 w:0) Proof: `Staking::MinCommission` (`max_values`: Some(1), `max_size`: Some(4), added: 499, mode: `MaxEncodedLen`) Storage: `Staking::Validators` (r:1 w:1) Proof: `Staking::Validators` (`max_values`: None, `max_size`: Some(45), added: 2520, mode: `MaxEncodedLen`) Median Slopes Analysis ======== -- Extrinsic Time -- Model: Time ~= 50.31 µs Reads = 2 Writes = 1 Recorded proof Size = 564 Min Squares Analysis ======== -- Extrinsic Time -- Model: Time ~= 50.31 µs Reads = 2 Writes = 1 Recorded proof Size = 564 ``` --------- Co-authored-by: cmd[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Bastian Köcher <git@kchr.de>
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/prdoc/pr_11069.prdoc b/prdoc/pr_11069.prdoc
@@ -0,0 +1,11 @@
+title: 'frame-omni-bencher: enable jemalloc-allocator'
+doc:
+- audience: Runtime Dev
+  description: |-
+    Fix huge benchmark regression for storage-heavy extrinsics, enabling jemalloc-allocator via polkadot-jemalloc-shim for omni-bencher, marked as optional in the scope of PR #10590.
+    Revert also the dummy write/read workaround at the end of commit_db() introduced by #10947.
+crates:
+- name: frame-omni-bencher
+  bump: patch
+- name: frame-benchmarking
+  bump: patch
diff --git a/substrate/frame/benchmarking/src/utils.rs b/substrate/frame/benchmarking/src/utils.rs
@@ -290,19 +290,6 @@ pub trait Benchmarking {
 	/// Commit pending storage changes to the trie database and clear the database cache.
 	fn commit_db(&mut self) {
 		self.commit();
-
-		// Warmup the memory allocator after bulk deallocation.
-		// After draining the overlay with many entries, the first new allocation can trigger memory
-		// defragmentation. The warmup key is whitelisted so these operations don't appear in
-		// benchmark results.
-		const WARMUP_KEY: &[u8] = b":benchmark_warmup:";
-		let mut whitelist = self.get_whitelist();
-		if !whitelist.iter().any(|k| k.key == WARMUP_KEY) {
-			whitelist.push(WARMUP_KEY.to_vec().into());
-			self.set_whitelist(whitelist);
-		}
-		self.place_storage(WARMUP_KEY.to_vec(), Some(vec![0u8; 32]));
-		self.place_storage(WARMUP_KEY.to_vec(), None);
 	}
 
 	/// Get the read/write count.
diff --git a/substrate/utils/frame/omni-bencher/Cargo.toml b/substrate/utils/frame/omni-bencher/Cargo.toml
@@ -15,11 +15,16 @@ workspace = true
 clap = { features = ["derive"], workspace = true }
 cumulus-primitives-proof-size-hostfunction = { workspace = true, default-features = true }
 frame-benchmarking-cli = { workspace = true }
+polkadot-jemalloc-shim = { workspace = true }
 sc-cli = { workspace = true, default-features = true }
 sp-runtime = { workspace = true, default-features = true }
 sp-statement-store = { workspace = true, default-features = true }
 tracing-subscriber = { workspace = true }
 
+[target.'cfg(target_os = "linux")'.dependencies]
+# Jemalloc as global allocator prevents huge benchmark regression for storage-heavy extrinsics.
+polkadot-jemalloc-shim = { workspace = true, features = ["jemalloc-allocator"] }
+
 [dev-dependencies]
 assert_cmd = { workspace = true }
 cumulus-test-runtime = { workspace = true }