Skip to content

Commit 64dba5b

Browse files
authored
Do not update live/max gc size when unavailable (#2875)
* Do not update live/max gc size when unavailable Previously, we were updating the live/max gc memory pool(s) size for non-generational GCs on every notification. Some notifications (`Shenandoah Pauses` and `ZGC Pauses`) do not have memory pool size information included. By updating the metrics with those notifications, we were setting the live/max gauges to zero. These notifications are followed by a `Cycles` notification that contain the memory pool data. This made for a race condition between reading the live/max gc data size metrics and updating them. There is a small window during which the metrics were set to zero. This fix avoids the race by skipping updating the live/max metrics for notifications known to not have memory pool info included. * Add test for gc size getting set to zero
1 parent 3ce041a commit 64dba5b

File tree

2 files changed

+163
-66
lines changed

2 files changed

+163
-66
lines changed

micrometer-core/src/main/java/io/micrometer/core/instrument/binder/jvm/JvmGcMetrics.java

Lines changed: 98 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import io.micrometer.core.util.internal.logging.InternalLoggerFactory;
2929

3030
import javax.management.ListenerNotFoundException;
31+
import javax.management.Notification;
3132
import javax.management.NotificationEmitter;
3233
import javax.management.NotificationListener;
3334
import javax.management.openmbean.CompositeData;
@@ -73,6 +74,13 @@ public class JvmGcMetrics implements MeterBinder, AutoCloseable {
7374

7475
private final List<Runnable> notificationListenerCleanUpRunnables = new CopyOnWriteArrayList<>();
7576

77+
private Counter allocatedBytes;
78+
@Nullable
79+
private Counter promotedBytes;
80+
private AtomicLong allocationPoolSizeAfter;
81+
private AtomicLong liveDataSize;
82+
private AtomicLong maxDataSize;
83+
7684
public JvmGcMetrics() {
7785
this(emptyList());
7886
}
@@ -90,109 +98,115 @@ public JvmGcMetrics(Iterable<Tag> tags) {
9098
this.tags = tags;
9199
}
92100

101+
// VisibleForTesting
102+
GcMetricsNotificationListener gcNotificationListener;
103+
93104
@Override
94105
public void bindTo(MeterRegistry registry) {
95106
if (!this.managementExtensionsPresent) {
96107
return;
97108
}
98109

110+
gcNotificationListener = new GcMetricsNotificationListener(registry);
111+
99112
double maxLongLivedPoolBytes = getLongLivedHeapPools().mapToDouble(mem -> getUsageValue(mem, MemoryUsage::getMax)).sum();
100113

101-
AtomicLong maxDataSize = new AtomicLong((long) maxLongLivedPoolBytes);
114+
maxDataSize = new AtomicLong((long) maxLongLivedPoolBytes);
102115
Gauge.builder("jvm.gc.max.data.size", maxDataSize, AtomicLong::get)
103116
.tags(tags)
104117
.description("Max size of long-lived heap memory pool")
105118
.baseUnit(BaseUnits.BYTES)
106119
.register(registry);
107120

108-
AtomicLong liveDataSize = new AtomicLong();
121+
liveDataSize = new AtomicLong();
109122

110123
Gauge.builder("jvm.gc.live.data.size", liveDataSize, AtomicLong::get)
111124
.tags(tags)
112125
.description("Size of long-lived heap memory pool after reclamation")
113126
.baseUnit(BaseUnits.BYTES)
114127
.register(registry);
115128

116-
Counter allocatedBytes = Counter.builder("jvm.gc.memory.allocated").tags(tags)
129+
allocatedBytes = Counter.builder("jvm.gc.memory.allocated").tags(tags)
117130
.baseUnit(BaseUnits.BYTES)
118131
.description("Incremented for an increase in the size of the (young) heap memory pool after one GC to before the next")
119132
.register(registry);
120133

121-
Counter promotedBytes = (isGenerationalGc) ? Counter.builder("jvm.gc.memory.promoted").tags(tags)
134+
promotedBytes = (isGenerationalGc) ? Counter.builder("jvm.gc.memory.promoted").tags(tags)
122135
.baseUnit(BaseUnits.BYTES)
123136
.description("Count of positive increases in the size of the old generation memory pool before GC to after GC")
124137
.register(registry) : null;
125138

126-
final AtomicLong allocationPoolSizeAfter = new AtomicLong(0L);
139+
allocationPoolSizeAfter = new AtomicLong(0L);
127140

128-
for (GarbageCollectorMXBean mbean : ManagementFactory.getGarbageCollectorMXBeans()) {
129-
if (!(mbean instanceof NotificationEmitter)) {
141+
for (GarbageCollectorMXBean gcBean : ManagementFactory.getGarbageCollectorMXBeans()) {
142+
if (!(gcBean instanceof NotificationEmitter)) {
130143
continue;
131144
}
132-
NotificationListener notificationListener = (notification, ref) -> {
133-
CompositeData cd = (CompositeData) notification.getUserData();
134-
GarbageCollectionNotificationInfo notificationInfo = GarbageCollectionNotificationInfo.from(cd);
135-
136-
String gcCause = notificationInfo.getGcCause();
137-
String gcAction = notificationInfo.getGcAction();
138-
GcInfo gcInfo = notificationInfo.getGcInfo();
139-
long duration = gcInfo.getDuration();
140-
if (isConcurrentPhase(gcCause, notificationInfo.getGcName())) {
141-
Timer.builder("jvm.gc.concurrent.phase.time")
142-
.tags(tags)
143-
.tags("action", gcAction, "cause", gcCause)
144-
.description("Time spent in concurrent phase")
145-
.register(registry)
146-
.record(duration, TimeUnit.MILLISECONDS);
147-
} else {
148-
Timer.builder("jvm.gc.pause")
149-
.tags(tags)
150-
.tags("action", gcAction, "cause", gcCause)
151-
.description("Time spent in GC pause")
152-
.register(registry)
153-
.record(duration, TimeUnit.MILLISECONDS);
154-
}
155-
156-
final Map<String, MemoryUsage> before = gcInfo.getMemoryUsageBeforeGc();
157-
final Map<String, MemoryUsage> after = gcInfo.getMemoryUsageAfterGc();
158-
159-
countPoolSizeDelta(before, after, allocatedBytes, allocationPoolSizeAfter, allocationPoolName);
160-
161-
final long longLivedBefore = longLivedPoolNames.stream().mapToLong(pool -> before.get(pool).getUsed()).sum();
162-
final long longLivedAfter = longLivedPoolNames.stream().mapToLong(pool -> after.get(pool).getUsed()).sum();
163-
if (isGenerationalGc) {
164-
final long delta = longLivedAfter - longLivedBefore;
165-
if (delta > 0L) {
166-
promotedBytes.increment(delta);
167-
}
168-
}
169-
170-
// Some GC implementations such as G1 can reduce the old gen size as part of a minor GC. To track the
171-
// live data size we record the value if we see a reduction in the old gen heap size or
172-
// after a major GC.
173-
if (longLivedAfter < longLivedBefore || isMajorGc(notificationInfo.getGcName())) {
174-
liveDataSize.set(longLivedAfter);
175-
maxDataSize.set(longLivedPoolNames.stream().mapToLong(pool -> after.get(pool).getMax()).sum());
176-
}
177-
};
178-
NotificationEmitter notificationEmitter = (NotificationEmitter) mbean;
179-
notificationEmitter.addNotificationListener(notificationListener, notification -> notification.getType().equals(GarbageCollectionNotificationInfo.GARBAGE_COLLECTION_NOTIFICATION), null);
145+
NotificationEmitter notificationEmitter = (NotificationEmitter) gcBean;
146+
notificationEmitter.addNotificationListener(gcNotificationListener, notification -> notification.getType().equals(GarbageCollectionNotificationInfo.GARBAGE_COLLECTION_NOTIFICATION), null);
180147
notificationListenerCleanUpRunnables.add(() -> {
181148
try {
182-
notificationEmitter.removeNotificationListener(notificationListener);
149+
notificationEmitter.removeNotificationListener(gcNotificationListener);
183150
} catch (ListenerNotFoundException ignore) {
184151
}
185152
});
186153
}
187154
}
188155

189-
private boolean isGenerationalGcConfigured() {
190-
return ManagementFactory.getMemoryPoolMXBeans().stream()
191-
.filter(JvmMemory::isHeap)
192-
.map(MemoryPoolMXBean::getName)
193-
.filter(name -> !name.contains("tenured"))
194-
.count() > 1;
195-
}
156+
class GcMetricsNotificationListener implements NotificationListener {
157+
private final MeterRegistry registry;
158+
159+
public GcMetricsNotificationListener(MeterRegistry registry) {
160+
this.registry = registry;
161+
}
162+
163+
@Override
164+
public void handleNotification(Notification notification, Object ref) {
165+
CompositeData cd = (CompositeData) notification.getUserData();
166+
GarbageCollectionNotificationInfo notificationInfo = GarbageCollectionNotificationInfo.from(cd);
167+
168+
String gcCause = notificationInfo.getGcCause();
169+
String gcAction = notificationInfo.getGcAction();
170+
GcInfo gcInfo = notificationInfo.getGcInfo();
171+
long duration = gcInfo.getDuration();
172+
if (isConcurrentPhase(gcCause, notificationInfo.getGcName())) {
173+
Timer.builder("jvm.gc.concurrent.phase.time")
174+
.tags(tags)
175+
.tags("action", gcAction, "cause", gcCause)
176+
.description("Time spent in concurrent phase")
177+
.register(registry)
178+
.record(duration, TimeUnit.MILLISECONDS);
179+
} else {
180+
Timer.builder("jvm.gc.pause")
181+
.tags(tags)
182+
.tags("action", gcAction, "cause", gcCause)
183+
.description("Time spent in GC pause")
184+
.register(registry)
185+
.record(duration, TimeUnit.MILLISECONDS);
186+
}
187+
188+
final Map<String, MemoryUsage> before = gcInfo.getMemoryUsageBeforeGc();
189+
final Map<String, MemoryUsage> after = gcInfo.getMemoryUsageAfterGc();
190+
191+
countPoolSizeDelta(before, after, allocatedBytes, allocationPoolSizeAfter, allocationPoolName);
192+
193+
final long longLivedBefore = longLivedPoolNames.stream().mapToLong(pool -> before.get(pool).getUsed()).sum();
194+
final long longLivedAfter = longLivedPoolNames.stream().mapToLong(pool -> after.get(pool).getUsed()).sum();
195+
if (isGenerationalGc) {
196+
final long delta = longLivedAfter - longLivedBefore;
197+
if (delta > 0L) {
198+
promotedBytes.increment(delta);
199+
}
200+
}
201+
202+
// Some GC implementations such as G1 can reduce the old gen size as part of a minor GC. To track the
203+
// live data size we record the value if we see a reduction in the long-lived heap size or
204+
// after a major/non-generational GC.
205+
if (longLivedAfter < longLivedBefore || shouldUpdateDataSizeMetrics(notificationInfo.getGcName())) {
206+
liveDataSize.set(longLivedAfter);
207+
maxDataSize.set(longLivedPoolNames.stream().mapToLong(pool -> after.get(pool).getMax()).sum());
208+
}
209+
}
196210

197211
private void countPoolSizeDelta(Map<String, MemoryUsage> before, Map<String, MemoryUsage> after, Counter counter,
198212
AtomicLong previousPoolSize, @Nullable String poolName) {
@@ -208,8 +222,27 @@ private void countPoolSizeDelta(Map<String, MemoryUsage> before, Map<String, Mem
208222
}
209223
}
210224

211-
private boolean isMajorGc(String gcName) {
212-
return !isGenerationalGc || GcGenerationAge.fromGcName(gcName) == GcGenerationAge.OLD;
225+
private boolean shouldUpdateDataSizeMetrics(String gcName) {
226+
return nonGenerationalGcShouldUpdateDataSize(gcName) || isMajorGenerationalGc(gcName);
227+
}
228+
229+
private boolean isMajorGenerationalGc(String gcName) {
230+
return GcGenerationAge.fromGcName(gcName) == GcGenerationAge.OLD;
231+
}
232+
233+
private boolean nonGenerationalGcShouldUpdateDataSize(String gcName) {
234+
return !isGenerationalGc
235+
// Skip Shenandoah and ZGC gc notifications with the name Pauses due to missing memory pool size info
236+
&& !gcName.endsWith("Pauses");
237+
}
238+
}
239+
240+
private boolean isGenerationalGcConfigured() {
241+
return ManagementFactory.getMemoryPoolMXBeans().stream()
242+
.filter(JvmMemory::isHeap)
243+
.map(MemoryPoolMXBean::getName)
244+
.filter(name -> !name.contains("tenured"))
245+
.count() > 1;
213246
}
214247

215248
private static boolean isManagementExtensionsPresent() {

micrometer-core/src/test/java/io/micrometer/core/instrument/binder/jvm/JvmGcMetricsTest.java

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,31 @@
1515
*/
1616
package io.micrometer.core.instrument.binder.jvm;
1717

18+
import com.sun.management.GarbageCollectionNotificationInfo;
1819
import com.tngtech.archunit.base.DescribedPredicate;
1920
import com.tngtech.archunit.core.domain.JavaClasses;
2021
import com.tngtech.archunit.core.importer.ClassFileImporter;
2122
import com.tngtech.archunit.lang.ArchRule;
23+
import io.micrometer.core.Issue;
2224
import io.micrometer.core.instrument.Timer;
25+
import io.micrometer.core.instrument.binder.jvm.JvmGcMetrics.GcMetricsNotificationListener;
2326
import io.micrometer.core.instrument.simple.SimpleMeterRegistry;
24-
import org.junit.jupiter.api.*;
27+
import org.junit.jupiter.api.BeforeEach;
28+
import org.junit.jupiter.api.Disabled;
29+
import org.junit.jupiter.api.Test;
2530
import org.junit.jupiter.api.condition.EnabledForJreRange;
2631
import org.junit.jupiter.api.condition.JRE;
2732

33+
import javax.management.ListenerNotFoundException;
34+
import javax.management.Notification;
35+
import javax.management.NotificationEmitter;
36+
import javax.management.NotificationListener;
2837
import java.lang.management.GarbageCollectorMXBean;
2938
import java.lang.management.ManagementFactory;
39+
import java.util.ArrayList;
40+
import java.util.Collection;
41+
import java.util.Collections;
42+
import java.util.List;
3043
import java.util.concurrent.TimeUnit;
3144

3245
import static com.tngtech.archunit.core.domain.JavaClass.Predicates.resideInAPackage;
@@ -103,6 +116,57 @@ void gcTimingIsCorrect() {
103116
checkCollectionTime(pauseTimeMs, concurrentTimeMs);
104117
}
105118

119+
@Test
120+
@Issue("gh-2872")
121+
void sizeMetricsNotSetToZero() {
122+
GcMetricsNotificationListener gcMetricsNotificationListener = binder.gcNotificationListener;
123+
NotificationCapturingListener capturingListener = new NotificationCapturingListener();
124+
Collection<Runnable> notificationListenerCleanUpRunnables = new ArrayList<>();
125+
126+
// register capturing notification listener
127+
for (GarbageCollectorMXBean gcBean : ManagementFactory.getGarbageCollectorMXBeans()) {
128+
if (!(gcBean instanceof NotificationEmitter)) {
129+
continue;
130+
}
131+
NotificationEmitter notificationEmitter = (NotificationEmitter) gcBean;
132+
notificationEmitter.addNotificationListener(capturingListener, notification -> notification.getType().equals(GarbageCollectionNotificationInfo.GARBAGE_COLLECTION_NOTIFICATION), null);
133+
notificationListenerCleanUpRunnables.add(() -> {
134+
try {
135+
notificationEmitter.removeNotificationListener(capturingListener);
136+
} catch (ListenerNotFoundException ignore) {
137+
}
138+
});
139+
}
140+
141+
try {
142+
System.gc(); // capture real gc notifications
143+
List<Notification> notifications = capturingListener.getNotifications();
144+
assertThat(notifications).isNotEmpty();
145+
// replay each notification and check size metrics not set to zero
146+
for (Notification notification : notifications) {
147+
gcMetricsNotificationListener.handleNotification(notification, null);
148+
assertThat(registry.get("jvm.gc.live.data.size").gauge().value()).isPositive();
149+
assertThat(registry.get("jvm.gc.max.data.size").gauge().value()).isPositive();
150+
}
151+
} finally {
152+
notificationListenerCleanUpRunnables.forEach(Runnable::run);
153+
}
154+
}
155+
156+
static class NotificationCapturingListener implements NotificationListener {
157+
158+
private final List<Notification> notifications = new ArrayList<>();
159+
160+
List<Notification> getNotifications() {
161+
return Collections.unmodifiableList(notifications);
162+
}
163+
164+
@Override
165+
public void handleNotification(Notification notification, Object handback) {
166+
notifications.add(notification);
167+
}
168+
}
169+
106170
private void checkPhaseCount(long expectedPauseCount, long expectedConcurrentCount) {
107171
await().atMost(200, TimeUnit.MILLISECONDS).untilAsserted(() -> {
108172
long observedPauseCount = registry.find("jvm.gc.pause").timers().stream().mapToLong(Timer::count).sum();

0 commit comments

Comments
 (0)