Skip to content

Commit cecb039

Browse files
committed
nsexec: retry unshare on EINVAL
Older kernels may return EINVAL on unshare when a process is reading runc's /proc/$PID/status or /proc/$PID/maps. This was fixed by kernel commit 12c641ab8270f ("unshare: Unsharing a thread does not require unsharing a vm") in Linuxt v4.3. For CentOS 7, the fix was backported to CentOS 7.7 (kernel 3.10.0-1062). To work around this kernel bug, let's retry on EINVAL a few times. Reported-by: zzyyzte <[email protected]> Signed-off-by: Kir Kolyshkin <[email protected]>
1 parent b3a68fe commit cecb039

File tree

1 file changed

+22
-8
lines changed

1 file changed

+22
-8
lines changed

libcontainer/nsenter/nsexec.c

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -833,6 +833,25 @@ void send_mountsources(int sockfd, pid_t child, char *mountsources, size_t mount
833833
bail("failed to close container mount namespace fd %d", container_mntns_fd);
834834
}
835835

836+
void try_unshare(int flags, const char *msg)
837+
{
838+
write_log(DEBUG, "unshare %s", msg);
839+
/*
840+
* Kernels prior to v4.3 may return EINVAL on unshare when another process
841+
* reads runc's /proc/$PID/status or /proc/$PID/maps. To work around this,
842+
* retry on EINVAL a few times.
843+
*/
844+
int retries = 5;
845+
for (; retries > 0; retries--) {
846+
if (unshare(flags) == 0) {
847+
return;
848+
}
849+
if (errno != EINVAL)
850+
break;
851+
}
852+
bail("failed to unshare %s", msg);
853+
}
854+
836855
void nsexec(void)
837856
{
838857
int pipenum;
@@ -1171,9 +1190,7 @@ void nsexec(void)
11711190
* problem.
11721191
*/
11731192
if (config.cloneflags & CLONE_NEWUSER) {
1174-
write_log(DEBUG, "unshare user namespace");
1175-
if (unshare(CLONE_NEWUSER) < 0)
1176-
bail("failed to unshare user namespace");
1193+
try_unshare(CLONE_NEWUSER, "user namespace");
11771194
config.cloneflags &= ~CLONE_NEWUSER;
11781195

11791196
/*
@@ -1225,9 +1242,7 @@ void nsexec(void)
12251242
* some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID)
12261243
* was broken, so we'll just do it the long way anyway.
12271244
*/
1228-
write_log(DEBUG, "unshare remaining namespace (except cgroupns)");
1229-
if (unshare(config.cloneflags & ~CLONE_NEWCGROUP) < 0)
1230-
bail("failed to unshare remaining namespaces (except cgroupns)");
1245+
try_unshare(config.cloneflags & ~CLONE_NEWCGROUP, "remaining namespaces (except cgroupns)");
12311246

12321247
/* Ask our parent to send the mount sources fds. */
12331248
if (config.mountsources) {
@@ -1340,8 +1355,7 @@ void nsexec(void)
13401355
}
13411356

13421357
if (config.cloneflags & CLONE_NEWCGROUP) {
1343-
if (unshare(CLONE_NEWCGROUP) < 0)
1344-
bail("failed to unshare cgroup namespace");
1358+
try_unshare(CLONE_NEWCGROUP, "cgroup namespace");
13451359
}
13461360

13471361
write_log(DEBUG, "signal completion to stage-0");

0 commit comments

Comments
 (0)