88package docker
99
1010import (
11+ "context"
1112 "math"
1213 "time"
1314
@@ -16,12 +17,32 @@ import (
1617 "github.com/DataDog/datadog-agent/pkg/aggregator/sender"
1718 "github.com/DataDog/datadog-agent/pkg/collector/corechecks/containers/generic"
1819 "github.com/DataDog/datadog-agent/pkg/util/containers/metrics"
20+ "github.com/DataDog/datadog-agent/pkg/util/docker"
1921 "github.com/DataDog/datadog-agent/pkg/util/log"
2022)
2123
24+ // cpuSharesWeightMapping represents the formula used to convert between
25+ // cgroup v1 CPU shares and cgroup v2 CPU weight.
26+ type cpuSharesWeightMapping int
27+
28+ const (
29+ // mappingUnknown indicates the mapping hasn't been detected yet
30+ mappingUnknown cpuSharesWeightMapping = iota
31+ // mappingLinear is the old linear mapping from Kubernetes/runc < 1.3.2
32+ // Formula: weight = 1 + ((shares - 2) * 9999) / 262142
33+ mappingLinear
34+ // mappingNonLinear is the new quadratic mapping from runc >= 1.3.2
35+ // Reference: https://github.com/opencontainers/runc/pull/4785
36+ mappingNonLinear
37+ )
38+
2239type dockerCustomMetricsExtension struct {
2340 sender generic.SenderFunc
2441 aggSender sender.Sender
42+
43+ // mapping tracks which CPU shares<->weight conversion formula the runtime uses.
44+ // It's detected lazily on the first container with enough data.
45+ mapping cpuSharesWeightMapping
2546}
2647
2748func (dn * dockerCustomMetricsExtension ) PreProcess (sender generic.SenderFunc , aggSender sender.Sender ) {
@@ -76,28 +97,58 @@ func (dn *dockerCustomMetricsExtension) Process(tags []string, container *worklo
7697 // it is [1,10000].
7798 // - Even when using cgroups v2, the "docker run" command only accepts
7899 // cpu shares as a parameter. "docker inspect" also shows shares. The
79- // formulas used to convert between shares and weights are these:
80- // https://github.com/kubernetes/kubernetes/blob/release-1.28/pkg/kubelet/cm/cgroup_manager_linux.go#L565
100+ // formulas used to convert between shares and weights depend on the
101+ // runtime version:
102+ // - runc < 1.3.2 / crun < 1.23: linear mapping (old Kubernetes formula)
103+ // https://github.com/kubernetes/kubernetes/blob/release-1.28/pkg/kubelet/cm/cgroup_manager_linux.go#L565
104+ // - runc >= 1.3.2 / crun >= 1.23: quadratic mapping
105+ // https://github.com/opencontainers/runc/pull/4785
106+ // - We detect which mapping is in use by comparing the actual weight
107+ // with expected values computed from Docker's configured shares.
108+ // - The value emitted by the check with the old linear formula is not
109+ // exactly the same as in Docker because of the rounding applied in
110+ // the conversions. Example:
111+ // - Run a container with 2048 shares in a system with cgroups v2.
112+ // - The 2048 shares are converted to weight:
113+ // weight = (((shares - 2) * 9999) / 262142) + 1 = 79.04 (rounds to 79)
114+ // - This check converts the weight back to shares:
115+ // shares = (((weight - 1) * 262142) / 9999) + 2 = 2046.91 (rounds to 2047)
81116 // - Because docker shows shares everywhere regardless of the cgroup
82117 // version and "docker.cpu.shares" is a docker-specific metric, we think
83118 // that it is less confusing to always report shares to match what
84119 // the docker client reports.
85120 // - "docker inspect" reports 0 shares when the container is created
86121 // without specifying the number of shares. When that's the case, the
87122 // default applies: 1024 for shares and 100 for weight.
88- // - The value emitted by the check is not exactly the same as in
89- // Docker because of the rounding applied in the conversions. Example:
90- // - Run a container with 2048 shares in a system with cgroups v2.
91- // - The 2048 shares are converted to weight in cgroups v2:
92- // weight = (((shares - 2) * 9999) / 262142) + 1 = 79.04 (cgroups rounds to 79)
93- // - This check converts the weight to shares again to report the same as in docker:
94- // shares = (((weight - 1) * 262142) / 9999) + 2 = 2046.91 (will be rounded to 2047, instead of the original 2048).
95123
96124 var cpuShares float64
97125 if containerStats .CPU .Shares != nil {
126+ // we have the logical shares value directly from cgroups v1.
127+ //
128+ // Cgroup v1 CPU shares has a range of [2^1...2^18], i.e. [2...262144],
129+ // and the default value is 1024.
98130 cpuShares = * containerStats .CPU .Shares
99131 } else if containerStats .CPU .Weight != nil {
100- cpuShares = math .Round (cpuWeightToCPUShares (* containerStats .CPU .Weight ))
132+ // cgroups v2: we only have weight, need to convert back to shares.
133+ // First, try to detect the mapping if we haven't already.
134+ // Cgroup v2 CPU weight has a range of [10^0...10^4], i.e. [1...10000],
135+ // and the default value is 100.
136+ if dn .mapping == mappingUnknown {
137+ dn .detectMapping (container .ID , * containerStats .CPU .Weight )
138+ }
139+
140+ weight := * containerStats .CPU .Weight
141+ switch dn .mapping {
142+ case mappingLinear :
143+ // Old mapping
144+ cpuShares = math .Round (cpuWeightToSharesLinear (weight ))
145+ case mappingNonLinear :
146+ // New mapping
147+ cpuShares = math .Round (cpuWeightToSharesNonLinear (weight ))
148+ default :
149+ // Cannot determine mapping, don't emit potentially wrong metric
150+ return
151+ }
101152 }
102153
103154 // 0 is not a valid value for shares. cpuShares == 0 means that we
@@ -113,7 +164,138 @@ func (dn *dockerCustomMetricsExtension) PostProcess(tagger.Component) {
113164 // Nothing to do here
114165}
115166
116- // From https://github.com/kubernetes/kubernetes/blob/release-1.28/pkg/kubelet/cm/cgroup_manager_linux.go#L571
117- func cpuWeightToCPUShares (cpuWeight float64 ) float64 {
167+ // detectMapping attempts to detect which CPU shares<->weight mapping formula
168+ // the container runtime is using by comparing the actual weight from cgroups
169+ // with expected values computed from Docker's configured shares.
170+ func (dn * dockerCustomMetricsExtension ) detectMapping (containerID string , actualWeight float64 ) {
171+ if actualWeight == 0 {
172+ return // Can't detect without a valid weight
173+ }
174+
175+ du , err := docker .GetDockerUtil ()
176+ if err != nil {
177+ log .Debugf ("docker check: couldn't get docker util for mapping detection: %v" , err )
178+ return
179+ }
180+
181+ ctx , cancel := context .WithTimeout (context .Background (), 2 * time .Second )
182+ defer cancel ()
183+
184+ inspect , err := du .Inspect (ctx , containerID , false )
185+ if err != nil {
186+ log .Debugf ("docker check: couldn't inspect container %s for mapping detection: %v" , containerID , err )
187+ return
188+ }
189+
190+ if inspect .HostConfig == nil {
191+ return
192+ }
193+
194+ configuredShares := uint64 (inspect .HostConfig .CPUShares )
195+ // Docker returns 0 when shares weren't explicitly set, meaning "use default" (1024)
196+ if configuredShares == 0 {
197+ configuredShares = 1024
198+ }
199+
200+ weight := uint64 (actualWeight )
201+ expectedLinear := cpuSharesToWeightLinear (configuredShares )
202+ expectedNonLinear := cpuSharesToWeightNonLinear (configuredShares )
203+
204+ // Use tolerance of ±1 to handle rounding edge cases
205+ matchesLinear := absDiff (weight , expectedLinear ) <= 1
206+ matchesNonLinear := absDiff (weight , expectedNonLinear ) <= 1
207+
208+ switch {
209+ case matchesLinear && ! matchesNonLinear :
210+ dn .mapping = mappingLinear
211+ log .Debugf ("docker check: detected linear (old) shares<->weight mapping (shares=%d, weight=%d)" , configuredShares , weight )
212+ case matchesNonLinear && ! matchesLinear :
213+ dn .mapping = mappingNonLinear
214+ log .Debugf ("docker check: detected non-linear (new) shares<->weight mapping (shares=%d, weight=%d)" , configuredShares , weight )
215+ default :
216+ // Ambiguous or unknown runtime - don't set mapping, will retry detection.
217+ // This avoids emitting potentially wrong metrics.
218+ log .Debugf ("docker check: couldn't determine shares<->weight mapping (shares=%d, weight=%d, expectedLinear=%d, expectedNonLinear=%d), will retry" ,
219+ configuredShares , weight , expectedLinear , expectedNonLinear )
220+ }
221+ }
222+
223+ // cpuSharesToWeightLinear converts CPU shares to weight using the old linear
224+ // formula from Kubernetes/runc < 1.3.2.
225+ // Reference: https://github.com/kubernetes/kubernetes/blob/release-1.28/pkg/kubelet/cm/cgroup_manager_linux.go#L565
226+ func cpuSharesToWeightLinear (cpuShares uint64 ) uint64 {
227+ if cpuShares < 2 {
228+ cpuShares = 2
229+ } else if cpuShares > 262144 {
230+ cpuShares = 262144
231+ }
232+ return 1 + ((cpuShares - 2 )* 9999 )/ 262142
233+ }
234+
235+ // cpuSharesToWeightNonLinear converts CPU shares to weight using the new
236+ // quadratic formula from runc >= 1.3.2 / crun >= 1.23.
237+ // This formula ensures min, max, and default values all map correctly:
238+ // - shares=2 (min) -> weight=1 (min)
239+ // - shares=1024 (default) -> weight=100 (default)
240+ // - shares=262144 (max) -> weight=10000 (max)
241+ //
242+ // Reference: https://github.com/opencontainers/runc/pull/4785
243+ func cpuSharesToWeightNonLinear (cpuShares uint64 ) uint64 {
244+ if cpuShares == 0 {
245+ return 0
246+ }
247+ if cpuShares <= 2 {
248+ return 1
249+ }
250+ if cpuShares >= 262144 {
251+ return 10000
252+ }
253+ l := math .Log2 (float64 (cpuShares ))
254+ exponent := (l * l + 125 * l )/ 612.0 - 7.0 / 34.0
255+ return uint64 (math .Ceil (math .Pow (10 , exponent )))
256+ }
257+
258+ // cpuWeightToSharesLinear converts CPU weight to shares using the inverse of
259+ // the old linear formula from Kubernetes/runc < 1.3.2.
260+ func cpuWeightToSharesLinear (cpuWeight float64 ) float64 {
261+ if cpuWeight <= 0 {
262+ return 0
263+ }
118264 return (((cpuWeight - 1 ) * 262142 ) / 9999 ) + 2
119265}
266+
267+ // cpuWeightToSharesNonLinear converts CPU weight to shares using the inverse
268+ // of the quadratic formula from runc >= 1.3.2.
269+ // Forward: l = log2(shares); exponent = (l² + 125l) / 612 - 7/34; weight = ceil(10^exponent)
270+ // (reference: https://github.com/opencontainers/cgroups/blob/fd95216684463f30144d5f5e41b6f54528feedee/utils.go#L425-L441)
271+ // Inverse: solve quadratic l² + 125l - 612*(exponent + 7/34) = 0
272+ // We use geometric mean sqrt((weight-1)*weight) to estimate the original 10^exponent
273+ // value before ceil() was applied.
274+ func cpuWeightToSharesNonLinear (cpuWeight float64 ) float64 {
275+ if cpuWeight <= 0 {
276+ return 0
277+ }
278+ if cpuWeight <= 1 {
279+ return 2
280+ }
281+ if cpuWeight >= 10000 {
282+ return 262144
283+ }
284+
285+ // Use geometric mean to estimate original value before ceil()
286+ targetValue := math .Sqrt ((cpuWeight - 1 ) * cpuWeight )
287+ exponent := math .Log10 (targetValue )
288+
289+ constant := 612.0 * (exponent + 7.0 / 34.0 )
290+ discriminant := 125.0 * 125.0 + 4.0 * constant
291+ l := (- 125.0 + math .Sqrt (discriminant )) / 2.0
292+ return math .Round (math .Pow (2 , l ))
293+ }
294+
295+ // absDiff returns the absolute difference between two uint64 values.
296+ func absDiff (a , b uint64 ) uint64 {
297+ if a > b {
298+ return a - b
299+ }
300+ return b - a
301+ }
0 commit comments