1 |
Supporting using PID namespace in order to isolate the ebuild processes |
2 |
from host system, and make it possible to kill them all easily |
3 |
(similarly to cgroups but easier to use). |
4 |
|
5 |
Bug: https://bugs.gentoo.org/659582 |
6 |
Signed-off-by: Michał Górny <mgorny@g.o> |
7 |
--- |
8 |
lib/portage/const.py | 1 + |
9 |
lib/portage/package/ebuild/doebuild.py | 8 +++-- |
10 |
lib/portage/process.py | 48 +++++++++++++++++++++++--- |
11 |
man/make.conf.5 | 7 ++++ |
12 |
4 files changed, 57 insertions(+), 7 deletions(-) |
13 |
|
14 |
diff --git a/lib/portage/const.py b/lib/portage/const.py |
15 |
index e0f93f7cc..ca66bc46e 100644 |
16 |
--- a/lib/portage/const.py |
17 |
+++ b/lib/portage/const.py |
18 |
@@ -174,6 +174,7 @@ SUPPORTED_FEATURES = frozenset([ |
19 |
"notitles", |
20 |
"parallel-fetch", |
21 |
"parallel-install", |
22 |
+ "pid-sandbox", |
23 |
"prelink-checksums", |
24 |
"preserve-libs", |
25 |
"protect-owned", |
26 |
diff --git a/lib/portage/package/ebuild/doebuild.py b/lib/portage/package/ebuild/doebuild.py |
27 |
index e84a618d2..9917ac82c 100644 |
28 |
--- a/lib/portage/package/ebuild/doebuild.py |
29 |
+++ b/lib/portage/package/ebuild/doebuild.py |
30 |
@@ -1,4 +1,4 @@ |
31 |
-# Copyright 2010-2018 Gentoo Foundation |
32 |
+# Copyright 2010-2018 Gentoo Authors |
33 |
# Distributed under the terms of the GNU General Public License v2 |
34 |
|
35 |
from __future__ import unicode_literals |
36 |
@@ -152,6 +152,7 @@ def _doebuild_spawn(phase, settings, actionmap=None, **kwargs): |
37 |
kwargs['networked'] = 'network-sandbox' not in settings.features or \ |
38 |
phase in _networked_phases or \ |
39 |
'network-sandbox' in settings['PORTAGE_RESTRICT'].split() |
40 |
+ kwargs['pidns'] = 'pid-sandbox' in settings.features |
41 |
|
42 |
if phase == 'depend': |
43 |
kwargs['droppriv'] = 'userpriv' in settings.features |
44 |
@@ -1482,7 +1483,7 @@ def _validate_deps(mysettings, myroot, mydo, mydbapi): |
45 |
# XXX Issue: cannot block execution. Deadlock condition. |
46 |
def spawn(mystring, mysettings, debug=False, free=False, droppriv=False, |
47 |
sesandbox=False, fakeroot=False, networked=True, ipc=True, |
48 |
- mountns=False, **keywords): |
49 |
+ mountns=False, pidns=False, **keywords): |
50 |
""" |
51 |
Spawn a subprocess with extra portage-specific options. |
52 |
Optiosn include: |
53 |
@@ -1518,6 +1519,8 @@ def spawn(mystring, mysettings, debug=False, free=False, droppriv=False, |
54 |
@type ipc: Boolean |
55 |
@param mountns: Run this command inside mount namespace |
56 |
@type mountns: Boolean |
57 |
+ @param pidns: Run this command in isolated PID namespace |
58 |
+ @type pidns: Boolean |
59 |
@param keywords: Extra options encoded as a dict, to be passed to spawn |
60 |
@type keywords: Dictionary |
61 |
@rtype: Integer |
62 |
@@ -1551,6 +1554,7 @@ def spawn(mystring, mysettings, debug=False, free=False, droppriv=False, |
63 |
keywords['unshare_net'] = not networked |
64 |
keywords['unshare_ipc'] = not ipc |
65 |
keywords['unshare_mount'] = mountns |
66 |
+ keywords['unshare_pid'] = pidns |
67 |
|
68 |
if not networked and mysettings.get("EBUILD_PHASE") != "nofetch" and \ |
69 |
("network-sandbox-proxy" in features or "distcc" in features): |
70 |
diff --git a/lib/portage/process.py b/lib/portage/process.py |
71 |
index 46868f442..dee126c3c 100644 |
72 |
--- a/lib/portage/process.py |
73 |
+++ b/lib/portage/process.py |
74 |
@@ -223,7 +223,8 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, returnpid=False, |
75 |
uid=None, gid=None, groups=None, umask=None, logfile=None, |
76 |
path_lookup=True, pre_exec=None, |
77 |
close_fds=(sys.version_info < (3, 4)), unshare_net=False, |
78 |
- unshare_ipc=False, unshare_mount=False, cgroup=None): |
79 |
+ unshare_ipc=False, unshare_mount=False, unshare_pid=False, |
80 |
+ cgroup=None): |
81 |
""" |
82 |
Spawns a given command. |
83 |
|
84 |
@@ -264,6 +265,8 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, returnpid=False, |
85 |
@param unshare_mount: If True, mount namespace will be unshared and mounts will |
86 |
be private to the namespace |
87 |
@type unshare_mount: Boolean |
88 |
+ @param unshare_pid: If True, PID ns will be unshared from the spawned process |
89 |
+ @type unshare_pid: Boolean |
90 |
@param cgroup: CGroup path to bind the process to |
91 |
@type cgroup: String |
92 |
|
93 |
@@ -332,7 +335,7 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, returnpid=False, |
94 |
# This caches the libc library lookup in the current |
95 |
# process, so that it's only done once rather than |
96 |
# for each child process. |
97 |
- if unshare_net or unshare_ipc or unshare_mount: |
98 |
+ if unshare_net or unshare_ipc or unshare_mount or unshare_pid: |
99 |
find_library("c") |
100 |
|
101 |
# Force instantiation of portage.data.userpriv_groups before the |
102 |
@@ -348,7 +351,8 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, returnpid=False, |
103 |
try: |
104 |
_exec(binary, mycommand, opt_name, fd_pipes, |
105 |
env, gid, groups, uid, umask, pre_exec, close_fds, |
106 |
- unshare_net, unshare_ipc, unshare_mount, cgroup) |
107 |
+ unshare_net, unshare_ipc, unshare_mount, unshare_pid, |
108 |
+ cgroup) |
109 |
except SystemExit: |
110 |
raise |
111 |
except Exception as e: |
112 |
@@ -418,7 +422,8 @@ def spawn(mycommand, env={}, opt_name=None, fd_pipes=None, returnpid=False, |
113 |
return 0 |
114 |
|
115 |
def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask, |
116 |
- pre_exec, close_fds, unshare_net, unshare_ipc, unshare_mount, cgroup): |
117 |
+ pre_exec, close_fds, unshare_net, unshare_ipc, unshare_mount, unshare_pid, |
118 |
+ cgroup): |
119 |
|
120 |
""" |
121 |
Execute a given binary with options |
122 |
@@ -450,6 +455,8 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask, |
123 |
@param unshare_mount: If True, mount namespace will be unshared and mounts will |
124 |
be private to the namespace |
125 |
@type unshare_mount: Boolean |
126 |
+ @param unshare_pid: If True, PID ns will be unshared from the spawned process |
127 |
+ @type unshare_pid: Boolean |
128 |
@param cgroup: CGroup path to bind the process to |
129 |
@type cgroup: String |
130 |
@rtype: None |
131 |
@@ -506,7 +513,7 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask, |
132 |
f.write('%d\n' % os.getpid()) |
133 |
|
134 |
# Unshare (while still uid==0) |
135 |
- if unshare_net or unshare_ipc or unshare_mount: |
136 |
+ if unshare_net or unshare_ipc or unshare_mount or unshare_pid: |
137 |
filename = find_library("c") |
138 |
if filename is not None: |
139 |
libc = LoadLibrary(filename) |
140 |
@@ -514,6 +521,7 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask, |
141 |
# from /usr/include/bits/sched.h |
142 |
CLONE_NEWNS = 0x00020000 |
143 |
CLONE_NEWIPC = 0x08000000 |
144 |
+ CLONE_NEWPID = 0x20000000 |
145 |
CLONE_NEWNET = 0x40000000 |
146 |
|
147 |
flags = 0 |
148 |
@@ -524,6 +532,9 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask, |
149 |
if unshare_mount: |
150 |
# NEWNS = mount namespace |
151 |
flags |= CLONE_NEWNS |
152 |
+ if unshare_pid: |
153 |
+ # we also need mount namespace for slave /proc |
154 |
+ flags |= CLONE_NEWPID | CLONE_NEWNS |
155 |
|
156 |
try: |
157 |
if libc.unshare(flags) != 0: |
158 |
@@ -531,6 +542,15 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask, |
159 |
errno.errorcode.get(ctypes.get_errno(), '?')), |
160 |
noiselevel=-1) |
161 |
else: |
162 |
+ if unshare_pid: |
163 |
+ # pid namespace requires us to become init |
164 |
+ # TODO: do init-ty stuff |
165 |
+ # therefore, fork() ASAP |
166 |
+ fork_ret = os.fork() |
167 |
+ if fork_ret != 0: |
168 |
+ pid, status = os.waitpid(fork_ret, 0) |
169 |
+ assert pid == fork_ret |
170 |
+ os._exit(status) |
171 |
if unshare_mount: |
172 |
# mark the whole filesystem as slave to avoid |
173 |
# mounts escaping the namespace |
174 |
@@ -541,6 +561,24 @@ def _exec(binary, mycommand, opt_name, fd_pipes, env, gid, groups, uid, umask, |
175 |
# TODO: should it be fatal maybe? |
176 |
writemsg("Unable to mark mounts slave: %d\n" % (mount_ret,), |
177 |
noiselevel=-1) |
178 |
+ if unshare_pid: |
179 |
+ # we need at least /proc being slave |
180 |
+ s = subprocess.Popen(['mount', |
181 |
+ '--make-slave', '/proc']) |
182 |
+ mount_ret = s.wait() |
183 |
+ if mount_ret != 0: |
184 |
+ # can't proceed with shared /proc |
185 |
+ writemsg("Unable to mark /proc slave: %d\n" % (mount_ret,), |
186 |
+ noiselevel=-1) |
187 |
+ os._exit(1) |
188 |
+ # mount new /proc for our namespace |
189 |
+ s = subprocess.Popen(['mount', |
190 |
+ '-t', 'proc', 'proc', '/proc']) |
191 |
+ mount_ret = s.wait() |
192 |
+ if mount_ret != 0: |
193 |
+ writemsg("Unable to mount new /proc: %d\n" % (mount_ret,), |
194 |
+ noiselevel=-1) |
195 |
+ os._exit(1) |
196 |
if unshare_net: |
197 |
# 'up' the loopback |
198 |
IFF_UP = 0x1 |
199 |
diff --git a/man/make.conf.5 b/man/make.conf.5 |
200 |
index 7cb5741ad..de04e5e34 100644 |
201 |
--- a/man/make.conf.5 |
202 |
+++ b/man/make.conf.5 |
203 |
@@ -558,6 +558,13 @@ Use finer\-grained locks when installing packages, allowing for greater |
204 |
parallelization. For additional parallelization, disable |
205 |
\fIebuild\-locks\fR. |
206 |
.TP |
207 |
+.B pid\-sandbox |
208 |
+Isolate the process space for the ebuild processes. This makes it |
209 |
+possible to cleanly kill all processes spawned by the ebuild. |
210 |
+Supported only on Linux. Requires PID and mount namespace support |
211 |
+in kernel. /proc is remounted inside the mount namespace to account |
212 |
+for new PID namespace. |
213 |
+.TP |
214 |
.B prelink\-checksums |
215 |
If \fBprelink\fR(8) is installed then use it to undo any prelinks on files |
216 |
before computing checksums for merge and unmerge. This feature is |
217 |
-- |
218 |
2.19.1 |