Skip to content

Commit 6aef9cd

Browse files
authored
Add --attempt-cache-bust to evmtool benchmark subcommand (#8985)
This means running all the test cases within one warmup or exec iteration in an attempt to invalidate CPU caches. Requires tracking exec timings in a map and calculating average at the end. Reduce warmup iteration to 20_000 and execution iterations to 1000, which appears to be as stable as 100_000. This helps reduces overall benchmark execution time (previously warmTime and execTime were prematurely reducing overall execution time). Use SequencedMap to indicate test case ordering matters. Signed-off-by: Simon Dudley <simon.dudley@consensys.net>
1 parent e051c95 commit 6aef9cd

File tree

12 files changed

+187
-84
lines changed

12 files changed

+187
-84
lines changed

CHANGELOG.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,12 @@
1313
- Improve transaction simulation and gas estimation when no gas pricing is present [#8888](https://github.com/hyperledger/besu/pull/8888)
1414
- Add option to trace reference tests during execution [#8878](https://github.com/hyperledger/besu/pull/8878)
1515
- Expose methods to query hardfork by block header or for the next block in the Plugin API [#8909](https://github.com/hyperledger/besu/pull/8909)
16-
- Enable decoding for large RPC requests [#8877](https://github.com/hyperledger/besu/pull/8877)
1716
- Generate distribution dependencies catalog [#8987](https://github.com/hyperledger/besu/pull/8987)
17+
18+
#### Performance
1819
- Improve the sync performance by not RLP decoding bodies during sync. This means we are using less memory and CPU, allowing us to increase the parallelism of the download pipeline, which has been increased from 4 to 8. Can be reduced again with `--Xsynchronizer-downloader-parallelism=4` [#8959]
20+
- Enable decoding for large RPC requests [#8877](https://github.com/hyperledger/besu/pull/8877)
21+
- Add --attempt-cache-bust to evmtool benchmark subcommand [#8985](https://github.com/hyperledger/besu/pull/8985)
1922

2023
#### Fusaka devnets
2124
- EIP-7910 - `eth_config` JSON-RPC Method [#8417](https://github.com/hyperledger/besu/pull/8417), [#8946](https://github.com/hyperledger/besu/pull/8946)

ethereum/evmtool/src/main/java/org/hyperledger/besu/evmtool/BenchmarkSubCommand.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,14 @@ enum Benchmark {
141141
scope = LOCAL)
142142
Optional<Integer> warmTime = Optional.empty();
143143

144+
@Option(
145+
names = {"--attempt-cache-bust"},
146+
description =
147+
"Run each test case within each warmup and exec iteration. This attempts to warm the code without warming the data, i.e. avoid warming CPU caches. Benchmark must have sufficient number and variety of test cases to be effective. --warm-time, --exec-time and --async-profiler are ignored.",
148+
scope = LOCAL,
149+
negatable = true)
150+
Boolean attemptCacheBust = false;
151+
144152
@Parameters(description = "One or more of ${COMPLETION-CANDIDATES}.")
145153
EnumSet<Benchmark> benchmarks = EnumSet.noneOf(Benchmark.class);
146154

@@ -177,7 +185,8 @@ public void run() {
177185
execIterations,
178186
execTime,
179187
warmIterations,
180-
warmTime);
188+
warmTime,
189+
attemptCacheBust);
181190
for (var benchmark : benchmarksToRun) {
182191
output.println("\nBenchmarks for " + benchmark + " on fork " + parentCommand.getFork());
183192
BenchmarkExecutor executor = benchmark.executorBuilder.create(output, benchmarkConfig);

ethereum/evmtool/src/main/java/org/hyperledger/besu/evmtool/benchmarks/AltBN128Benchmark.java

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
import java.io.PrintStream;
2424
import java.util.LinkedHashMap;
25-
import java.util.Map;
25+
import java.util.SequencedMap;
2626

2727
import org.apache.tuweni.bytes.Bytes;
2828

@@ -57,21 +57,36 @@ public void runBenchmark(final Boolean attemptNative, final String fork) {
5757
}
5858

5959
private void benchmarkAdd(final EvmSpecVersion forkVersion) {
60-
final Map<String, Bytes> testCases = new LinkedHashMap<>();
60+
final SequencedMap<String, Bytes> testCases = new LinkedHashMap<>();
6161
testCases.put(
62-
"add",
62+
"EcAdd",
6363
Bytes.fromHexString(
6464
"17c139df0efee0f766bc0204762b774362e4ded88953a39ce849a8a7fa163fa9"
6565
+ "01e0559bacb160664764a357af8a9fe70baa9258e0b959273ffc5718c6d4cc7c"
6666
+ "17c139df0efee0f766bc0204762b774362e4ded88953a39ce849a8a7fa163fa9"
6767
+ "2e83f8d734803fc370eba25ed1f6b8768bd6d83887b87165fc2434fe11a830cb"));
6868
testCases.put(
69-
"AddMarius",
69+
"EcAddMarius",
7070
Bytes.fromHexString(
7171
"1581d9d4d7eb0e3cdc75739f7098479097d579573f23e70b07cbd40a"
7272
+ "5d97bbc118da69b1d2cb7b89fdb3bba2524bf135453ce828faea190d8f4d48d572cbe3fe20"
7373
+ "34b5942fbdd612f2553a9fd9fa5eb4c3e5ce6a34ed100f62de0e380f4e67f8016027893e1f"
7474
+ "5082bdc48651667776e2b1e32a85edd33ff430eef15e8e68b3d460"));
75+
testCases.put(
76+
"EcAddAmez1",
77+
Bytes.fromHexString(
78+
"1f69350cfea1bdf51ecec3a0bdf57732411418502904eb86c10901af310b9ca0291547261fc8bbcf961534f3f3d282d4ab3bfa0b9f3b5ecb3e575d42fd13e25c"
79+
+ "2034b5942fbdd612f2553a9fd9fa5eb4c3e5ce6a34ed100f62de0e380f4e67f8016027893e1f5082bdc48651667776e2b1e32a85edd33ff430eef15e8e68b3d4"));
80+
testCases.put(
81+
"EcAddAmez2",
82+
Bytes.fromHexString(
83+
"03adc0948243eac87e40fd1d4a3b5693c651abd30a88b93f3dbe3f571791c4120e9f7fa4c9ae503399c7a4f0ee0e91961b759d94e49c8ea0d04b448dbc6f3d16"
84+
+ "2034b5942fbdd612f2553a9fd9fa5eb4c3e5ce6a34ed100f62de0e380f4e67f8016027893e1f5082bdc48651667776e2b1e32a85edd33ff430eef15e8e68b3d4"));
85+
testCases.put(
86+
"EcAddAmez3",
87+
Bytes.fromHexString(
88+
"012b7638324563dc328b870d414807ed27426354bc83f22c42690f717ae7137e19dffc74cd0183d631cb39f5f58d2846744119913519373ebb95f2654a989390"
89+
+ "2034b5942fbdd612f2553a9fd9fa5eb4c3e5ce6a34ed100f62de0e380f4e67f8016027893e1f5082bdc48651667776e2b1e32a85edd33ff430eef15e8e68b3d4"));
7590

7691
PrecompiledContract contract =
7792
EvmSpec.evmSpec(forkVersion).getPrecompileContractRegistry().get(Address.ALTBN128_ADD);
@@ -80,7 +95,7 @@ private void benchmarkAdd(final EvmSpecVersion forkVersion) {
8095
}
8196

8297
private void benchmarkMul(final EvmSpecVersion forkVersion) {
83-
final Map<String, Bytes> testCases = new LinkedHashMap<>();
98+
final SequencedMap<String, Bytes> testCases = new LinkedHashMap<>();
8499
testCases.put(
85100
"mul1",
86101
Bytes.fromHexString(
@@ -102,7 +117,7 @@ private void benchmarkMul(final EvmSpecVersion forkVersion) {
102117
}
103118

104119
private void benchmarkPairings(final EvmSpecVersion forkVersion) {
105-
final Map<String, Bytes> testCases = new LinkedHashMap<>();
120+
final SequencedMap<String, Bytes> testCases = new LinkedHashMap<>();
106121
testCases.put(
107122
"2 pairings",
108123
Bytes.fromHexString(

ethereum/evmtool/src/main/java/org/hyperledger/besu/evmtool/benchmarks/BLS12Benchmark.java

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
import java.io.PrintStream;
2424
import java.util.LinkedHashMap;
25-
import java.util.Map;
25+
import java.util.SequencedMap;
2626

2727
import org.apache.tuweni.bytes.Bytes;
2828

@@ -172,7 +172,7 @@ public void runBenchmark(final Boolean attemptNative, final String fork) {
172172
}
173173

174174
private void benchmarkG1Add(final EvmSpecVersion forkVersion) {
175-
final Map<String, Bytes> testCases = new LinkedHashMap<>();
175+
final SequencedMap<String, Bytes> testCases = new LinkedHashMap<>();
176176
for (int i = 0; i < g1PointPairs.length - 1; i++) {
177177
testCases.put("G1 Add " + i, Bytes.fromHexString(g1PointPairs[i] + g1PointPairs[i + 1]));
178178
}
@@ -184,7 +184,7 @@ private void benchmarkG1Add(final EvmSpecVersion forkVersion) {
184184
}
185185

186186
private void benchmarkG1MultiExp32Pairs(final EvmSpecVersion forkVersion) {
187-
final Map<String, Bytes> testCases = new LinkedHashMap<>();
187+
final SequencedMap<String, Bytes> testCases = new LinkedHashMap<>();
188188

189189
// add test cases for 2, 4, 8, 16, and 32 point/scalar pairs
190190
for (int i = 1; i <= 5; i++) {
@@ -202,7 +202,7 @@ private void benchmarkG1MultiExp32Pairs(final EvmSpecVersion forkVersion) {
202202
}
203203

204204
private void benchmarkMapFpToG1(final EvmSpecVersion forkVersion) {
205-
final Map<String, Bytes> testCases = new LinkedHashMap<>();
205+
final SequencedMap<String, Bytes> testCases = new LinkedHashMap<>();
206206
for (int i = 0; i < g1PointPairs.length; i++) {
207207
testCases.put("Map Fp to G1 " + i, Bytes.fromHexString(g1PointPairs[i].substring(0, 128)));
208208
}
@@ -216,7 +216,7 @@ private void benchmarkMapFpToG1(final EvmSpecVersion forkVersion) {
216216
}
217217

218218
private void benchmarkG2Add(final EvmSpecVersion forkVersion) {
219-
final Map<String, Bytes> testCases = new LinkedHashMap<>();
219+
final SequencedMap<String, Bytes> testCases = new LinkedHashMap<>();
220220
for (int i = 0; i < g2PointPairs.length - 1; i++) {
221221
testCases.put("G2 Add " + i, Bytes.fromHexString(g2PointPairs[i] + g2PointPairs[i + 1]));
222222
}
@@ -228,7 +228,7 @@ private void benchmarkG2Add(final EvmSpecVersion forkVersion) {
228228
}
229229

230230
private void benchmarkG2MultiExp32Pairs(final EvmSpecVersion forkVersion) {
231-
final Map<String, Bytes> testCases = new LinkedHashMap<>();
231+
final SequencedMap<String, Bytes> testCases = new LinkedHashMap<>();
232232

233233
// add test cases for 2, 4, 8, 16, and 32 point/scalar pairs
234234
for (int i = 1; i <= 5; i++) {
@@ -246,7 +246,7 @@ private void benchmarkG2MultiExp32Pairs(final EvmSpecVersion forkVersion) {
246246
}
247247

248248
private void benchmarkMapFp2ToG2(final EvmSpecVersion forkVersion) {
249-
final Map<String, Bytes> testCases = new LinkedHashMap<>();
249+
final SequencedMap<String, Bytes> testCases = new LinkedHashMap<>();
250250
for (int i = 0; i < g2PointPairs.length; i++) {
251251
testCases.put("Map Fp2 to G2 " + i, Bytes.fromHexString(g2PointPairs[i].substring(0, 256)));
252252
}
@@ -260,7 +260,7 @@ private void benchmarkMapFp2ToG2(final EvmSpecVersion forkVersion) {
260260
}
261261

262262
private void benchmarkBlsPairing(final EvmSpecVersion forkVersion) {
263-
final Map<String, Bytes> testCases = new LinkedHashMap<>();
263+
final SequencedMap<String, Bytes> testCases = new LinkedHashMap<>();
264264

265265
// add test cases for 2, 4, 8, 16, and 32 point/scalar pairs
266266
for (int i = 1; i <= 5; i++) {

ethereum/evmtool/src/main/java/org/hyperledger/besu/evmtool/benchmarks/BenchmarkConfig.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
* @param execTime run for an unbounded amount of time.
2828
* @param warmIterations warm up for an unbounded number of iterations.
2929
* @param warmTime warm up for an unbounded amount of time.
30+
* @param attemptCacheBust if true, run each test case within each iteration
3031
*/
3132
public record BenchmarkConfig(
3233
boolean useNative,
@@ -36,4 +37,5 @@ public record BenchmarkConfig(
3637
Optional<Integer> execIterations,
3738
Optional<Integer> execTime,
3839
Optional<Integer> warmIterations,
39-
Optional<Integer> warmTime) {}
40+
Optional<Integer> warmTime,
41+
boolean attemptCacheBust) {}

ethereum/evmtool/src/main/java/org/hyperledger/besu/evmtool/benchmarks/BenchmarkExecutor.java

Lines changed: 88 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,12 @@
3838
import org.hyperledger.besu.evm.precompile.PrecompiledContract;
3939

4040
import java.io.PrintStream;
41+
import java.util.HashMap;
42+
import java.util.LinkedHashMap;
4143
import java.util.Locale;
4244
import java.util.Map;
45+
import java.util.Optional;
46+
import java.util.SequencedMap;
4347
import java.util.concurrent.TimeUnit;
4448
import java.util.concurrent.atomic.AtomicReference;
4549
import java.util.regex.Pattern;
@@ -54,8 +58,8 @@ public abstract class BenchmarkExecutor {
5458
private static final int MAX_WARMUP_TIME_IN_SECONDS = 3;
5559
private static final long GAS_PER_SECOND_STANDARD = 100_000_000L;
5660

57-
static final int MATH_WARMUP = 100_000;
58-
static final int MATH_ITERATIONS = 100_000;
61+
static final int MATH_WARMUP = 20_000;
62+
static final int MATH_ITERATIONS = 1_000;
5963

6064
/** Where to write the output of the benchmarks. */
6165
protected final PrintStream output;
@@ -129,10 +133,19 @@ public BenchmarkExecutor(
129133
: Integer.MAX_VALUE));
130134
this.output = output;
131135
this.precompileTableHeader =
132-
() ->
133-
output.printf(
134-
"%-30s | %12s | %12s | %15s | %15s%n",
135-
"", "Actual cost", "Derived Cost", "Iteration time", "Throughput");
136+
() -> {
137+
if (benchmarkConfig.attemptCacheBust())
138+
output.println("--attempt-cache-bust=true (--warm-time and --exec-time ignored)");
139+
output.printf("--warm-iterations=%d%n", warmIterations);
140+
if (!benchmarkConfig.attemptCacheBust())
141+
output.printf("--warm-time=%ss%n", warmTimeInNano / 1.0e9D);
142+
output.printf("--exec-iterations=%d%n", execIterations);
143+
if (!benchmarkConfig.attemptCacheBust())
144+
output.printf("--exec-time=%ss%n", execTimeInNano / 1.0e9D);
145+
output.printf(
146+
"%-30s | %12s | %12s | %15s | %15s%n",
147+
"", "Actual cost", "Derived Cost", "Iteration time", "Throughput");
148+
};
136149
this.config = benchmarkConfig;
137150
assert warmIterations <= 0;
138151
assert execIterations <= 0;
@@ -147,7 +160,7 @@ public BenchmarkExecutor(
147160
* @param evmSpecVersion EVM specification version to run the precompile for.
148161
*/
149162
public void precompile(
150-
final Map<String, Bytes> testCases,
163+
final SequencedMap<String, Bytes> testCases,
151164
final PrecompiledContract contract,
152165
final EvmSpecVersion evmSpecVersion) {
153166

@@ -156,15 +169,71 @@ public void precompile(
156169
"contract is unsupported on " + evmSpecVersion + " fork");
157170
}
158171

172+
Optional<Pattern> maybePattern = config.testCasePattern().map(Pattern::compile);
173+
LinkedHashMap<String, Bytes> filteredTestCases = new LinkedHashMap<>();
174+
testCases.forEach(
175+
(k, v) -> {
176+
if (maybePattern.map(p -> p.matcher(k).find()).orElse(true)) {
177+
filteredTestCases.put(k, v);
178+
}
179+
});
180+
181+
if (config.attemptCacheBust()) {
182+
runPrecompileAttemptCacheBust(filteredTestCases, contract);
183+
} else {
184+
runPrecompile(filteredTestCases, contract);
185+
}
186+
}
187+
188+
private void runPrecompileAttemptCacheBust(
189+
final Map<String, Bytes> testCases, final PrecompiledContract contract) {
190+
191+
// Warmup all test cases in serial inside one warmup iteration
192+
// avoid using warmTime as it is now dependent on the number of test cases
193+
for (int i = 0; i < warmIterations; i++) {
194+
for (final Map.Entry<String, Bytes> testCase : testCases.entrySet()) {
195+
contract.computePrecompile(testCase.getValue(), fakeFrame);
196+
}
197+
}
198+
199+
// Also run all test cases in serial inside one iteration
200+
Map<String, Long> totalElapsedByTestName = new HashMap<>();
201+
int executions = 0;
202+
while (executions < execIterations) {
203+
for (final Map.Entry<String, Bytes> testCase : testCases.entrySet()) {
204+
final long iterationStart = System.nanoTime();
205+
final var result = contract.computePrecompile(testCase.getValue(), fakeFrame);
206+
final long iterationElapsed = System.nanoTime() - iterationStart;
207+
if (result.output() != null) {
208+
// adds iterationElapsed if absent, or sums with existing value
209+
totalElapsedByTestName.merge(testCase.getKey(), iterationElapsed, Long::sum);
210+
} else {
211+
throw new IllegalArgumentException("Input is Invalid for " + testCase.getValue());
212+
}
213+
}
214+
executions++;
215+
}
216+
159217
for (final Map.Entry<String, Bytes> testCase : testCases.entrySet()) {
160-
if (config.testCasePattern().isPresent()
161-
&& !Pattern.compile(config.testCasePattern().get()).matcher(testCase.getKey()).find()) {
162-
continue;
218+
if (totalElapsedByTestName.containsKey(testCase.getKey())) {
219+
final double execTime =
220+
totalElapsedByTestName.get(testCase.getKey()) / 1.0e9D / execIterations;
221+
// log the performance of the precompile
222+
long gasCost = contract.gasRequirement(testCases.get(testCase.getKey()));
223+
logPrecompilePerformance(testCase.getKey(), gasCost, execTime);
224+
} else {
225+
output.printf("%s Test case missing from results%n", testCase.getKey());
163226
}
227+
}
228+
}
164229

230+
private void runPrecompile(
231+
final Map<String, Bytes> testCases, final PrecompiledContract contract) {
232+
233+
// Fully warmup and execute, test case by test case
234+
for (final Map.Entry<String, Bytes> testCase : testCases.entrySet()) {
165235
final double execTime =
166236
runPrecompileBenchmark(testCase.getKey(), testCase.getValue(), contract);
167-
168237
long gasCost = contract.gasRequirement(testCase.getValue());
169238
logPrecompilePerformance(testCase.getKey(), gasCost, execTime);
170239
}
@@ -181,14 +250,18 @@ public void precompile(
181250
protected double runPrecompileBenchmark(
182251
final String testName, final Bytes arg, final PrecompiledContract contract) {
183252
if (contract.computePrecompile(arg, fakeFrame).output() == null) {
184-
throw new RuntimeException("Input is Invalid");
253+
throw new IllegalArgumentException("Input is Invalid for " + testName);
185254
}
186255

187-
long startNanoTime = System.nanoTime();
188-
for (int i = 0; i < warmIterations && System.nanoTime() - startNanoTime < warmTimeInNano; i++) {
256+
// Warmup individual test case fully, which may have side effect of warming cpu caches
257+
long startWarmNanoTime = System.nanoTime();
258+
for (int i = 0;
259+
i < warmIterations && System.nanoTime() - startWarmNanoTime < warmTimeInNano;
260+
i++) {
189261
contract.computePrecompile(arg, fakeFrame);
190262
}
191263

264+
// Iterations
192265
final AtomicReference<AsyncProfiler> asyncProfiler = new AtomicReference<>();
193266
config
194267
.asyncProfilerOptions()
@@ -206,7 +279,6 @@ protected double runPrecompileBenchmark(
206279

207280
int executions = 0;
208281
long totalElapsed = 0;
209-
210282
while (executions < execIterations && totalElapsed < execTimeInNano) {
211283
long iterationStart = System.nanoTime();
212284
contract.computePrecompile(arg, fakeFrame);
@@ -224,7 +296,7 @@ protected double runPrecompileBenchmark(
224296
}
225297
}
226298

227-
return (totalElapsed / 1.0e9D) / executions;
299+
return totalElapsed / 1.0e9D / executions;
228300
}
229301

230302
/**

ethereum/evmtool/src/main/java/org/hyperledger/besu/evmtool/benchmarks/ECRecoverBenchmark.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import java.io.PrintStream;
2525
import java.util.LinkedHashMap;
2626
import java.util.Map;
27+
import java.util.SequencedMap;
2728

2829
import org.apache.tuweni.bytes.Bytes;
2930

@@ -44,7 +45,7 @@ public ECRecoverBenchmark(final PrintStream output, final BenchmarkConfig benchm
4445
public void runBenchmark(final Boolean attemptNative, final String fork) {
4546
EvmSpecVersion evmSpecVersion = EvmSpecVersion.fromName(fork);
4647

47-
final Map<String, Bytes> testCases = new LinkedHashMap<>();
48+
final SequencedMap<String, Bytes> testCases = new LinkedHashMap<>();
4849
testCases.put(
4950
"0x0c65a9d9ffc02c7c99e36e32ce0f950c7804ceda",
5051
Bytes.fromHexString(

ethereum/evmtool/src/main/java/org/hyperledger/besu/evmtool/benchmarks/KZGPointEvalBenchmark.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
import java.io.PrintStream;
2424
import java.util.LinkedHashMap;
25-
import java.util.Map;
25+
import java.util.SequencedMap;
2626

2727
import org.apache.tuweni.bytes.Bytes;
2828

@@ -47,7 +47,7 @@ public void runBenchmark(final Boolean attemptNative, final String fork) {
4747
}
4848
output.println("Native KZGPointEval");
4949

50-
final Map<String, Bytes> testCases = new LinkedHashMap<>();
50+
final SequencedMap<String, Bytes> testCases = new LinkedHashMap<>();
5151
testCases.put(
5252
"kzg-verify",
5353
Bytes.fromHexString(

0 commit comments

Comments
 (0)