Gentoo Archives: gentoo-commits

From: Mike Frysinger <vapier@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/sandbox:master commit in: libsandbox/wrapper-funcs/, tests/, libsandbox/
Date: Tue, 29 Mar 2016 12:24:43
Message-Id: 1455668633.55087abd8dc9802cf68cade776fe612a3f19f6a1.vapier@gentoo
1 commit: 55087abd8dc9802cf68cade776fe612a3f19f6a1
2 Author: Mike Frysinger <vapier <AT> gentoo <DOT> org>
3 AuthorDate: Wed Feb 17 00:23:53 2016 +0000
4 Commit: Mike Frysinger <vapier <AT> gentoo <DOT> org>
5 CommitDate: Wed Feb 17 00:23:53 2016 +0000
6 URL: https://gitweb.gentoo.org/proj/sandbox.git/commit/?id=55087abd
7
8 libsandbox: use ptrace on apps that interpose their own allocator
9
10 If an app installs its own memory allocator by overriding the internal
11 glibc symbols, then we can easily hit a loop that cannot be broken: the
12 dlsym functions can attempt to allocate memory, and sandbox relies on
13 them to find the "real" functions. So when someone calls a symbol that
14 the sandbox protects, we call dlsym, and that calls malloc, which calls
15 back into the app, and their allocator might use another symbol such as
16 open ... which is protected by the sandbox. So we hit the loop like:
17 -> open -> libsandbox:open -> dlsym -> malloc -> open ->
18 libsandbox:open -> dlsym -> malloc -> ...
19
20 Change the exec checking logic to scan the ELF instead. If it exports
21 these glibc symbols, then we have to assume it can trigger a loop, so
22 scrub the sandbox environment to prevent us from being loaded. Then we
23 use the out-of-process tracer (i.e. ptrace). This should generally be
24 as robust anyways ... if it's not, that's a bug we want to fix as this
25 is the same code used for static apps.
26
27 URL: http://crbug.com/586444
28 Reported-by: Ryo Hashimoto <hashimoto <AT> chromium.org>
29 Signed-off-by: Mike Frysinger <vapier <AT> gentoo.org>
30
31 libsandbox/libsandbox.c | 72 ++++++++++-------
32 libsandbox/libsandbox.h | 2 +-
33 libsandbox/wrapper-funcs/__wrapper_exec.c | 126 ++++++++++++++++++++++++++----
34 tests/Makefile.am | 3 +
35 tests/malloc-2.sh | 4 +
36 tests/malloc.at | 1 +
37 tests/malloc_hooked_tst.c | 44 +++++++++++
38 7 files changed, 210 insertions(+), 42 deletions(-)
39
40 diff --git a/libsandbox/libsandbox.c b/libsandbox/libsandbox.c
41 index 2bcff95..7555862 100644
42 --- a/libsandbox/libsandbox.c
43 +++ b/libsandbox/libsandbox.c
44 @@ -1137,7 +1137,7 @@ typedef struct {
45 * XXX: Might be much nicer if we could serialize these vars behind the back of
46 * the program. Might be hard to handle LD_PRELOAD though ...
47 */
48 -char **sb_check_envp(char **envp, size_t *mod_cnt)
49 +char **sb_check_envp(char **envp, size_t *mod_cnt, bool insert)
50 {
51 char **my_env;
52 char *entry;
53 @@ -1194,7 +1194,8 @@ char **sb_check_envp(char **envp, size_t *mod_cnt)
54 }
55
56 /* If we found everything, there's nothing to do! */
57 - if (num_vars == found_var_cnt)
58 + if ((insert && num_vars == found_var_cnt) ||
59 + (!insert && found_var_cnt == 0))
60 /* Use the user's envp */
61 return envp;
62
63 @@ -1217,33 +1218,50 @@ char **sb_check_envp(char **envp, size_t *mod_cnt)
64 }
65
66 my_env = NULL;
67 - if (mod_cnt) {
68 - /* Count directly due to variability with LD_PRELOAD and the value
69 - * logic below. Getting out of sync can mean memory corruption. */
70 - *mod_cnt = 0;
71 - if (unlikely(merge_ld_preload)) {
72 - str_list_add_item(my_env, ld_preload, error);
73 - (*mod_cnt)++;
74 - }
75 - for (i = 0; i < num_vars; ++i) {
76 - if (found_vars[i] || !vars[i].value)
77 - continue;
78 - str_list_add_item_env(my_env, vars[i].name, vars[i].value, error);
79 - (*mod_cnt)++;
80 - }
81 -
82 - str_list_for_each_item(envp, entry, count) {
83 - if (unlikely(merge_ld_preload && is_env_var(entry, vars[0].name, vars[0].len)))
84 - continue;
85 - str_list_add_item(my_env, entry, error);
86 + if (!insert) {
87 + if (mod_cnt) {
88 + str_list_for_each_item(envp, entry, count) {
89 + for (i = 0; i < num_vars; ++i)
90 + if (is_env_var(entry, vars[i].name, vars[i].len)) {
91 + (*mod_cnt)++;
92 + goto skip;
93 + }
94 + str_list_add_item(my_env, entry, error);
95 + skip: ;
96 + }
97 + } else {
98 + for (i = 0; i < num_vars; ++i)
99 + unsetenv(vars[i].name);
100 }
101 } else {
102 - if (unlikely(merge_ld_preload))
103 - putenv(ld_preload);
104 - for (i = 0; i < num_vars; ++i) {
105 - if (found_vars[i] || !vars[i].value)
106 - continue;
107 - setenv(vars[i].name, vars[i].value, 1);
108 + if (mod_cnt) {
109 + /* Count directly due to variability with LD_PRELOAD and the value
110 + * logic below. Getting out of sync can mean memory corruption. */
111 + *mod_cnt = 0;
112 + if (unlikely(merge_ld_preload)) {
113 + str_list_add_item(my_env, ld_preload, error);
114 + (*mod_cnt)++;
115 + }
116 + for (i = 0; i < num_vars; ++i) {
117 + if (found_vars[i] || !vars[i].value)
118 + continue;
119 + str_list_add_item_env(my_env, vars[i].name, vars[i].value, error);
120 + (*mod_cnt)++;
121 + }
122 +
123 + str_list_for_each_item(envp, entry, count) {
124 + if (unlikely(merge_ld_preload && is_env_var(entry, vars[0].name, vars[0].len)))
125 + continue;
126 + str_list_add_item(my_env, entry, error);
127 + }
128 + } else {
129 + if (unlikely(merge_ld_preload))
130 + putenv(ld_preload);
131 + for (i = 0; i < num_vars; ++i) {
132 + if (found_vars[i] || !vars[i].value)
133 + continue;
134 + setenv(vars[i].name, vars[i].value, 1);
135 + }
136 }
137 }
138
139
140 diff --git a/libsandbox/libsandbox.h b/libsandbox/libsandbox.h
141 index 596084d..63882e7 100644
142 --- a/libsandbox/libsandbox.h
143 +++ b/libsandbox/libsandbox.h
144 @@ -56,7 +56,7 @@ void *get_dlsym(const char *symname, const char *symver);
145
146 extern char sandbox_lib[SB_PATH_MAX];
147 extern bool sandbox_on;
148 -char **sb_check_envp(char **envp, size_t *mod_cnt);
149 +char **sb_check_envp(char **envp, size_t *mod_cnt, bool insert);
150 void sb_cleanup_envp(char **envp, size_t mod_cnt);
151 extern pid_t trace_pid;
152
153
154 diff --git a/libsandbox/wrapper-funcs/__wrapper_exec.c b/libsandbox/wrapper-funcs/__wrapper_exec.c
155 index 7107c21..f7f51ab 100644
156 --- a/libsandbox/wrapper-funcs/__wrapper_exec.c
157 +++ b/libsandbox/wrapper-funcs/__wrapper_exec.c
158 @@ -20,17 +20,20 @@ static WRAPPER_RET_TYPE (*WRAPPER_TRUE_NAME)(WRAPPER_ARGS_PROTO) = NULL;
159 #ifndef SB_EXEC_COMMON
160 #define SB_EXEC_COMMON
161
162 -/* Check to see if this a static ELF and if so, protect using trace mechanisms */
163 -static void sb_check_exec(const char *filename, char *const argv[])
164 +/* Check to see if we can run this program in-process. If not, try to fall back
165 + * tracing it out-of-process via some trace mechanisms (e.g. ptrace).
166 + */
167 +static bool sb_check_exec(const char *filename, char *const argv[])
168 {
169 int fd;
170 unsigned char *elf;
171 struct stat st;
172 bool do_trace = false;
173 + bool run_in_process = true;
174
175 - fd = open(filename, O_RDONLY|O_CLOEXEC);
176 + fd = sb_unwrapped_open_DEFAULT(filename, O_RDONLY|O_CLOEXEC, 0);
177 if (fd == -1)
178 - return;
179 + return true;
180 if (fstat(fd, &st))
181 goto out_fd;
182 if (st.st_size < sizeof(Elf64_Ehdr))
183 @@ -57,19 +60,110 @@ static void sb_check_exec(const char *filename, char *const argv[])
184 * gains root just to preload libsandbox.so. That unfortunately
185 * could easily open up people to root vulns.
186 */
187 - if (getuid() == 0 || !(st.st_mode & (S_ISUID | S_ISGID))) {
188 + if (st.st_mode & (S_ISUID | S_ISGID))
189 + if (getuid() != 0)
190 + run_in_process = false;
191 +
192 + /* We also need to ptrace programs that interpose their own allocator.
193 + * http://crbug.com/586444
194 + */
195 + if (run_in_process) {
196 + static const char * const libc_alloc_syms[] = {
197 + "__libc_calloc",
198 + "__libc_free",
199 + "__libc_malloc",
200 + "__libc_realloc",
201 + "__malloc_hook",
202 + "__realloc_hook",
203 + "__free_hook",
204 + "__memalign_hook",
205 + "__malloc_initialize_hook",
206 + };
207 #define PARSE_ELF(n) \
208 ({ \
209 Elf##n##_Ehdr *ehdr = (void *)elf; \
210 Elf##n##_Phdr *phdr = (void *)(elf + ehdr->e_phoff); \
211 - uint16_t p; \
212 - if (st.st_size < sizeof(*ehdr)) \
213 - goto out_mmap; \
214 + Elf##n##_Addr vaddr, filesz, vsym = 0, vstr = 0; \
215 + Elf##n##_Off offset, symoff = 0, stroff = 0; \
216 + Elf##n##_Dyn *dyn; \
217 + Elf##n##_Sym *sym; \
218 + uint##n##_t ent_size = 0, str_size = 0; \
219 + bool dynamic = false; \
220 + size_t i; \
221 + \
222 if (st.st_size < ehdr->e_phoff + ehdr->e_phentsize * ehdr->e_phnum) \
223 goto out_mmap; \
224 - for (p = 0; p < ehdr->e_phnum; ++p) \
225 - if (phdr[p].p_type == PT_INTERP) \
226 - goto done; \
227 + \
228 + /* First gather the tags we care about. */ \
229 + for (i = 0; i < ehdr->e_phnum; ++i) { \
230 + switch (phdr[i].p_type) { \
231 + case PT_INTERP: dynamic = true; break; \
232 + case PT_DYNAMIC: \
233 + dyn = (void *)(elf + phdr[i].p_offset); \
234 + while (dyn->d_tag != DT_NULL) { \
235 + switch (dyn->d_tag) { \
236 + case DT_SYMTAB: vsym = dyn->d_un.d_val; break; \
237 + case DT_SYMENT: ent_size = dyn->d_un.d_val; break; \
238 + case DT_STRTAB: vstr = dyn->d_un.d_val; break; \
239 + case DT_STRSZ: str_size = dyn->d_un.d_val; break; \
240 + } \
241 + ++dyn; \
242 + } \
243 + break; \
244 + } \
245 + } \
246 + \
247 + if (dynamic && vsym && ent_size && vstr && str_size) { \
248 + /* Figure out where in the file these tables live. */ \
249 + for (i = 0; i < ehdr->e_phnum; ++i) { \
250 + vaddr = phdr[i].p_vaddr; \
251 + filesz = phdr[i].p_filesz; \
252 + offset = phdr[i].p_offset; \
253 + if (vsym >= vaddr && vsym < vaddr + filesz) \
254 + symoff = offset + (vsym - vaddr); \
255 + if (vstr >= vaddr && vstr < vaddr + filesz) \
256 + stroff = offset + (vstr - vaddr); \
257 + } \
258 + \
259 + /* Finally walk the symbol table. This should generally be fast as \
260 + * we only look at exported symbols, and the vast majority of exes \
261 + * out there do not export any symbols at all. \
262 + */ \
263 + if (symoff && stroff) { \
264 + sym = (void *)(elf + symoff); \
265 + /* Nowhere is the # of symbols recorded, or the size of the symbol \
266 + * table. Instead, we do what glibc does: assume that the string \
267 + * table always follows the symbol table. This seems like a poor \
268 + * assumption to make, but glibc has gotten by this long. We could \
269 + * rely on DT_HASH and walking all the buckets to find the largest \
270 + * symbol index, but that's also a bit hacky. \
271 + * \
272 + * We don't sanity check the ranges here as you aren't executing \
273 + * corrupt programs in the sandbox. \
274 + */ \
275 + for (i = 0; i < (vstr - vsym) / ent_size; ++i) { \
276 + char *symname = (void *)(elf + stroff + sym->st_name); \
277 + if (ELF##n##_ST_VISIBILITY(sym->st_other) == STV_DEFAULT && \
278 + sym->st_shndx != SHN_UNDEF && sym->st_shndx < SHN_LORESERVE && \
279 + sym->st_name && \
280 + /* Minor optimization to avoid strcmp. */ \
281 + symname[0] == '_' && symname[1] == '_') { \
282 + /* Blacklist internal C library symbols. */ \
283 + size_t j; \
284 + for (j = 0; j < ARRAY_SIZE(libc_alloc_syms); ++j) \
285 + if (!strcmp(symname, libc_alloc_syms[j])) { \
286 + run_in_process = false; \
287 + goto use_trace; \
288 + } \
289 + } \
290 + ++sym; \
291 + } \
292 + } \
293 + \
294 + } \
295 + \
296 + if (dynamic) \
297 + goto done; \
298 })
299 if (elf[EI_CLASS] == ELFCLASS32)
300 PARSE_ELF(32);
301 @@ -78,6 +172,7 @@ static void sb_check_exec(const char *filename, char *const argv[])
302 #undef PARSE_ELF
303 }
304
305 + use_trace:
306 do_trace = trace_possible(filename, argv, elf);
307 /* Now that we're done with stuff, clean up before forking */
308
309 @@ -90,6 +185,8 @@ static void sb_check_exec(const char *filename, char *const argv[])
310
311 if (do_trace)
312 trace_main(filename, argv);
313 +
314 + return run_in_process;
315 }
316
317 #endif
318 @@ -107,6 +204,7 @@ WRAPPER_RET_TYPE SB_HIDDEN_FUNC(WRAPPER_NAME)(WRAPPER_ARGS_PROTO_FULL)
319 WRAPPER_RET_TYPE WRAPPER_NAME(WRAPPER_ARGS_PROTO)
320 {
321 WRAPPER_RET_TYPE result = WRAPPER_RET_DEFAULT;
322 + bool run_in_process = true;
323
324 /* The C library may implement some exec funcs by calling other
325 * exec funcs. So we might get a little sandbox recursion going
326 @@ -151,15 +249,15 @@ WRAPPER_RET_TYPE WRAPPER_NAME(WRAPPER_ARGS_PROTO)
327 if (!SB_SAFE(check_path))
328 goto done;
329
330 - sb_check_exec(check_path, argv);
331 + run_in_process = sb_check_exec(check_path, argv);
332 }
333 #endif
334
335 #ifdef EXEC_MY_ENV
336 size_t mod_cnt;
337 - char **my_env = sb_check_envp((char **)envp, &mod_cnt);
338 + char **my_env = sb_check_envp((char **)envp, &mod_cnt, run_in_process);
339 #else
340 - sb_check_envp(environ, NULL);
341 + sb_check_envp(environ, NULL, run_in_process);
342 #endif
343
344 restore_errno();
345
346 diff --git a/tests/Makefile.am b/tests/Makefile.am
347 index 943ce3b..d98898f 100644
348 --- a/tests/Makefile.am
349 +++ b/tests/Makefile.am
350 @@ -73,6 +73,7 @@ check_PROGRAMS = \
351 \
352 getcwd-gnulib_tst \
353 libsigsegv_tst \
354 + malloc_hooked_tst \
355 malloc_mmap_tst \
356 pipe-fork_tst \
357 pipe-fork_static_tst \
358 @@ -92,6 +93,8 @@ AM_LDFLAGS = `expr $@ : .*_static >/dev/null && echo -all-static`
359 sb_printf_tst_CFLAGS = -I$(top_srcdir)/libsbutil -I$(top_srcdir)/libsbutil/include
360 sb_printf_tst_LDADD = $(top_builddir)/libsbutil/libsbutil.la
361
362 +malloc_hooked_tst_LDFLAGS = $(AM_LDFLAGS) -pthread
363 +
364 if HAVE_LIBSIGSEGV
365 libsigsegv_tst_LDADD = -lsigsegv
366 endif
367
368 diff --git a/tests/malloc-2.sh b/tests/malloc-2.sh
369 new file mode 100755
370 index 0000000..e1cf297
371 --- /dev/null
372 +++ b/tests/malloc-2.sh
373 @@ -0,0 +1,4 @@
374 +#!/bin/sh
375 +# Since the malloc binary is in the target ABI, make sure the exec is
376 +# launched from the same ABI so the same libsandbox.so is used.
377 +timeout -s KILL 10 execvp-0 malloc_hooked_tst malloc_hooked_tst
378
379 diff --git a/tests/malloc.at b/tests/malloc.at
380 index 081d7d2..d364b4b 100644
381 --- a/tests/malloc.at
382 +++ b/tests/malloc.at
383 @@ -1 +1,2 @@
384 SB_CHECK(1)
385 +SB_CHECK(2)
386
387 diff --git a/tests/malloc_hooked_tst.c b/tests/malloc_hooked_tst.c
388 new file mode 100644
389 index 0000000..18737fe
390 --- /dev/null
391 +++ b/tests/malloc_hooked_tst.c
392 @@ -0,0 +1,44 @@
393 +/* Make sure programs that override malloc don't mess us up:
394 + *
395 + * libsandbox's __attribute__((constructor)) libsb_init ->
396 + * libsandbox's malloc() ->
397 + * dlsym("mmap") ->
398 + * glibc's libdl calls malloc ->
399 + * tcmalloc's internal code calls open ->
400 + * libsandbox's open wrapper is hit ->
401 + * libsandbox tries to initialize itself (since it never finished originally) ->
402 + * libsandbox's malloc() ->
403 + * dlsym() -> deadlock
404 + * http://crbug.com/586444
405 + */
406 +
407 +#include "headers.h"
408 +
409 +static void *malloc_hook(size_t size, const void *caller)
410 +{
411 + int urandom_fd = open("/dev/urandom", O_RDONLY);
412 + close(urandom_fd);
413 + return NULL;
414 +}
415 +
416 +void *(*__malloc_hook)(size_t, const void *) = &malloc_hook;
417 +
418 +static void *thread_start(void *arg)
419 +{
420 + return arg;
421 +}
422 +
423 +int main(int argc, char *argv[])
424 +{
425 + /* Make sure we reference some pthread symbols, although we don't
426 + * really want to execute it -- our malloc is limited. */
427 + if (argc < 0) {
428 + pthread_t tid;
429 + pthread_create(&tid, NULL, thread_start, NULL);
430 + }
431 +
432 + /* Trigger malloc! */
433 + if (malloc(100)) {}
434 +
435 + return 0;
436 +}