Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions features.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ var featuresCommand = cli.Command{
Enabled: &t,
Schemata: &t,
},
MemoryPolicy: &features.MemoryPolicy{
Modes: specconv.KnownMemoryPolicyModes(),
Flags: specconv.KnownMemoryPolicyFlags(),
},
MountExtensions: &features.MountExtensions{
IDMap: &features.IDMap{
Enabled: &t,
Expand Down
13 changes: 13 additions & 0 deletions internal/linux/linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package linux

import (
"os"
"unsafe"

"golang.org/x/sys/unix"
)
Expand Down Expand Up @@ -72,3 +73,15 @@ func Sendmsg(fd int, p, oob []byte, to unix.Sockaddr, flags int) error {
})
return os.NewSyscallError("sendmsg", err)
}

// SetMempolicy wraps set_mempolicy.
func SetMempolicy(mode uint, mask *unix.CPUSet) error {
err := retryOnEINTR(func() error {
_, _, errno := unix.Syscall(unix.SYS_SET_MEMPOLICY, uintptr(mode), uintptr(unsafe.Pointer(mask)), unsafe.Sizeof(*mask)*8)
if errno != 0 {
return errno
}
return nil
})
return os.NewSyscallError("set_mempolicy", err)
}
12 changes: 8 additions & 4 deletions libcontainer/configs/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,9 @@ type Config struct {
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`

// MemoryPolicy specifies NUMA memory policy for the container.
MemoryPolicy *LinuxMemoryPolicy `json:"memory_policy,omitempty"`

// RootlessEUID is set when the runc was launched with non-zero EUID.
// Note that RootlessEUID is set to false when launched with EUID=0 in userns.
// When RootlessEUID is set, runc creates a new userns for the container.
Expand Down Expand Up @@ -305,7 +308,8 @@ type CPUAffinity struct {
Initial, Final *unix.CPUSet
}

func toCPUSet(str string) (*unix.CPUSet, error) {
// ToCPUSet parses a string in list format into a unix.CPUSet, e.g. "0-3,5,7-9".
func ToCPUSet(str string) (*unix.CPUSet, error) {
if str == "" {
return nil, nil
}
Expand Down Expand Up @@ -356,7 +360,7 @@ func toCPUSet(str string) (*unix.CPUSet, error) {
}
}
if s.Count() == 0 {
return nil, fmt.Errorf("no CPUs found in %q", str)
return nil, fmt.Errorf("no members found in set %q", str)
}

return s, nil
Expand All @@ -367,11 +371,11 @@ func ConvertCPUAffinity(sa *specs.CPUAffinity) (*CPUAffinity, error) {
if sa == nil {
return nil, nil
}
initial, err := toCPUSet(sa.Initial)
initial, err := ToCPUSet(sa.Initial)
if err != nil {
return nil, fmt.Errorf("bad CPUAffinity.Initial: %w", err)
}
final, err := toCPUSet(sa.Final)
final, err := ToCPUSet(sa.Final)
if err != nil {
return nil, fmt.Errorf("bad CPUAffinity.Final: %w", err)
}
Expand Down
31 changes: 31 additions & 0 deletions libcontainer/configs/memorypolicy.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package configs

import "golang.org/x/sys/unix"

// Memory policy modes and flags as defined in /usr/include/linux/mempolicy.h

//nolint:revive,staticcheck,nolintlint // ignore ALL_CAPS errors in consts from numaif.h, will match unix.* in the future
const (
MPOL_DEFAULT = 0
MPOL_PREFERRED = 1
MPOL_BIND = 2
MPOL_INTERLEAVE = 3
MPOL_LOCAL = 4
MPOL_PREFERRED_MANY = 5
MPOL_WEIGHTED_INTERLEAVE = 6

MPOL_F_STATIC_NODES = 1 << 15
MPOL_F_RELATIVE_NODES = 1 << 14
MPOL_F_NUMA_BALANCING = 1 << 13
)

// LinuxMemoryPolicy contains memory policy configuration.
type LinuxMemoryPolicy struct {
// Mode specifies memory policy mode without mode flags. See
// set_mempolicy() documentation for details.
Mode uint `json:"mode,omitempty"`
// Flags contains mode flags.
Flags uint `json:"flags,omitempty"`
// Nodes contains NUMA nodes to which the mode applies.
Nodes *unix.CPUSet `json:"nodes,omitempty"`
}
4 changes: 2 additions & 2 deletions libcontainer/configs/tocpuset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ func TestToCPUSet(t *testing.T) {

for _, tc := range testCases {
t.Run(tc.in, func(t *testing.T) {
out, err := toCPUSet(tc.in)
t.Logf("toCPUSet(%q) = %v (error: %v)", tc.in, out, err)
out, err := ToCPUSet(tc.in)
t.Logf("ToCPUSet(%q) = %v (error: %v)", tc.in, out, err)
// Check the error.
if tc.isErr {
if err == nil {
Expand Down
24 changes: 24 additions & 0 deletions libcontainer/configs/validate/validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ func Validate(config *configs.Config) error {
mountsStrict,
scheduler,
ioPriority,
memoryPolicy,
}
for _, c := range checks {
if err := c(config); err != nil {
Expand Down Expand Up @@ -482,3 +483,26 @@ func ioPriority(config *configs.Config) error {

return nil
}

func memoryPolicy(config *configs.Config) error {
mpol := config.MemoryPolicy
if mpol == nil {
return nil
}
switch mpol.Mode {
case configs.MPOL_DEFAULT, configs.MPOL_LOCAL:
if mpol.Nodes != nil && mpol.Nodes.Count() != 0 {
return fmt.Errorf("memory policy mode requires 0 nodes but got %d", mpol.Nodes.Count())
}
case configs.MPOL_BIND, configs.MPOL_INTERLEAVE,
configs.MPOL_PREFERRED_MANY, configs.MPOL_WEIGHTED_INTERLEAVE:
if mpol.Nodes == nil || mpol.Nodes.Count() == 0 {
return fmt.Errorf("memory policy mode requires at least one node but got 0")
}
case configs.MPOL_PREFERRED:
// Zero or more nodes are allowed by the kernel.
default:
return fmt.Errorf("invalid memory policy mode: %d", mpol.Mode)
}
return nil
}
8 changes: 8 additions & 0 deletions libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,14 @@ func setupIOPriority(config *initConfig) error {
return nil
}

func setupMemoryPolicy(config *configs.Config) error {
mpol := config.MemoryPolicy
if mpol == nil {
return nil
}
return linux.SetMempolicy(mpol.Mode|mpol.Flags, config.MemoryPolicy.Nodes)
}

func setupPersonality(config *configs.Config) error {
return system.SetLinuxPersonality(config.Personality.Domain)
}
Expand Down
4 changes: 4 additions & 0 deletions libcontainer/setns_init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ func (l *linuxSetnsInit) Init() error {
}
}

if err := setupMemoryPolicy(l.config.Config); err != nil {
return err
}

// Tell our parent that we're ready to exec. This must be done before the
// Seccomp rules have been applied, because we need to be able to read and
// write to a socket.
Expand Down
55 changes: 55 additions & 0 deletions libcontainer/specconv/spec_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"maps"
"os"
"path/filepath"
"slices"
"sort"
"strings"
"sync"
Expand Down Expand Up @@ -41,6 +42,8 @@ var (
flag int
}
complexFlags map[string]func(*configs.Mount)
mpolModeMap map[string]uint
mpolModeFMap map[string]uint
)

func initMaps() {
Expand Down Expand Up @@ -148,6 +151,22 @@ func initMaps() {
m.IDMapping.Recursive = true
},
}

mpolModeMap = map[string]uint{
string(specs.MpolDefault): configs.MPOL_DEFAULT,
string(specs.MpolPreferred): configs.MPOL_PREFERRED,
string(specs.MpolBind): configs.MPOL_BIND,
string(specs.MpolInterleave): configs.MPOL_INTERLEAVE,
string(specs.MpolLocal): configs.MPOL_LOCAL,
string(specs.MpolPreferredMany): configs.MPOL_PREFERRED_MANY,
string(specs.MpolWeightedInterleave): configs.MPOL_WEIGHTED_INTERLEAVE,
}

mpolModeFMap = map[string]uint{
string(specs.MpolFStaticNodes): configs.MPOL_F_STATIC_NODES,
string(specs.MpolFRelativeNodes): configs.MPOL_F_RELATIVE_NODES,
string(specs.MpolFNumaBalancing): configs.MPOL_F_NUMA_BALANCING,
}
})
}

Expand Down Expand Up @@ -184,6 +203,20 @@ func KnownMountOptions() []string {
return res
}

// KnownMemoryPolicyModes returns the list of the known memory policy modes.
// Used by `runc features`.
func KnownMemoryPolicyModes() []string {
initMaps()
return slices.Sorted(maps.Keys(mpolModeMap))
}

// KnownMemoryPolicyFlags returns the list of the known memory policy mode flags.
// Used by `runc features`.
func KnownMemoryPolicyFlags() []string {
initMaps()
return slices.Sorted(maps.Keys(mpolModeFMap))
}

// AllowedDevices is the set of devices which are automatically included for
// all containers.
//
Expand Down Expand Up @@ -468,6 +501,28 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
MemBwSchema: spec.Linux.IntelRdt.MemBwSchema,
}
}
if spec.Linux.MemoryPolicy != nil {
var ok bool
var err error
specMp := spec.Linux.MemoryPolicy
confMp := &configs.LinuxMemoryPolicy{}
confMp.Mode, ok = mpolModeMap[string(specMp.Mode)]
if !ok {
return nil, fmt.Errorf("invalid memory policy mode %q", specMp.Mode)
}
confMp.Nodes, err = configs.ToCPUSet(specMp.Nodes)
if err != nil {
return nil, fmt.Errorf("invalid memory policy nodes %q: %w", specMp.Nodes, err)
}
for _, specFlag := range specMp.Flags {
confFlag, ok := mpolModeFMap[string(specFlag)]
if !ok {
return nil, fmt.Errorf("invalid memory policy flag %q", specFlag)
}
confMp.Flags |= confFlag
}
config.MemoryPolicy = confMp
}
if spec.Linux.Personality != nil {
if len(spec.Linux.Personality.Flags) > 0 {
logrus.Warnf("ignoring unsupported personality flags: %+v because personality flag has not supported at this time", spec.Linux.Personality.Flags)
Expand Down
4 changes: 4 additions & 0 deletions libcontainer/standard_init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,10 @@ func (l *linuxStandardInit) Init() error {
}
}

if err := setupMemoryPolicy(l.config.Config); err != nil {
return err
}

// Tell our parent that we're ready to exec. This must be done before the
// Seccomp rules have been applied, because we need to be able to read and
// write to a socket.
Expand Down
Loading
Loading